const UChar* getFontBasedOnUnicodeBlock(int ucs4, SkFontMgr* fontManager) { static const UChar* emojiFonts[] = {L"Segoe UI Emoji", L"Segoe UI Symbol"}; static const UChar* symbolFont = L"Segoe UI Symbol"; const UChar* emojiFont = 0; static bool initialized = false; if (!initialized) { for (size_t i = 0; i < WTF_ARRAY_LENGTH(emojiFonts); i++) { if (isFontPresent(emojiFonts[i], fontManager)) { emojiFont = emojiFonts[i]; break; } } initialized = true; } UBlockCode block = ublock_getCode(ucs4); switch (block) { case UBLOCK_EMOTICONS: return emojiFont; case UBLOCK_PLAYING_CARDS: case UBLOCK_MISCELLANEOUS_SYMBOLS: case UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS: case UBLOCK_TRANSPORT_AND_MAP_SYMBOLS: case UBLOCK_ALCHEMICAL_SYMBOLS: case UBLOCK_RUNIC: case UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS: case UBLOCK_DINGBATS: return symbolFont; default: return 0; }; }
// There are a lot of characters in USCRIPT_COMMON that can be covered // by fonts for scripts closely related to them. See // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] // FIXME: make this more efficient with a wider coverage UScriptCode getScriptBasedOnUnicodeBlock(int ucs4) { UBlockCode block = ublock_getCode(ucs4); switch (block) { case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: return USCRIPT_HAN; case UBLOCK_HIRAGANA: case UBLOCK_KATAKANA: return USCRIPT_KATAKANA_OR_HIRAGANA; case UBLOCK_ARABIC: return USCRIPT_ARABIC; case UBLOCK_THAI: return USCRIPT_THAI; case UBLOCK_GREEK: return USCRIPT_GREEK; case UBLOCK_DEVANAGARI: // For Danda and Double Danda (U+0964, U+0965), use a Devanagari // font for now although they're used by other scripts as well. // Without a context, we can't do any better. return USCRIPT_DEVANAGARI; case UBLOCK_ARMENIAN: return USCRIPT_ARMENIAN; case UBLOCK_GEORGIAN: return USCRIPT_GEORGIAN; case UBLOCK_KANNADA: return USCRIPT_KANNADA; case UBLOCK_GOTHIC: return USCRIPT_GOTHIC; default: return USCRIPT_COMMON; } }
int UnicodeBlockSet::FindBlockIndex(UChar32 ch) { int block = ublock_getCode(ch); if (block == UBLOCK_INVALID_CODE) block = UBLOCK_NO_BLOCK; return block; }
const LanguageBreakEngine * ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { UErrorCode status = U_ZERO_ERROR; UScriptCode code = uscript_getScript(c, &status); if (U_SUCCESS(status)) { DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType); if (m != NULL) { const LanguageBreakEngine *engine = NULL; switch(code) { case USCRIPT_THAI: engine = new ThaiBreakEngine(m, status); break; case USCRIPT_KHMER: engine = new KhmerBreakEngine(m, status); break; #if !UCONFIG_NO_NORMALIZATION // CJK not available w/o normalization case USCRIPT_HANGUL: engine = new CjkBreakEngine(m, kKorean, status); break; // use same BreakEngine and dictionary for both Chinese and Japanese case USCRIPT_HIRAGANA: case USCRIPT_KATAKANA: case USCRIPT_HAN: engine = new CjkBreakEngine(m, kChineseJapanese, status); break; #if 0 // TODO: Have to get some characters with script=common handled // by CjkBreakEngine (e.g. U+309B). Simply subjecting // them to CjkBreakEngine does not work. The engine has to // special-case them. case USCRIPT_COMMON: { UBlockCode block = ublock_getCode(code); if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA) engine = new CjkBreakEngine(dict, kChineseJapanese, status); break; } #endif #endif default: break; } if (engine == NULL) { delete m; } else if (U_FAILURE(status)) { delete engine; engine = NULL; } return engine; } } return NULL; }
UnicodeBlockSet::UnicodeBlockSet() { int last_block = UBLOCK_INVALID_CODE; for(UChar32 u=0; u<=0x110000; ++u) { int block = ublock_getCode(u); if (block != last_block) { // last block end m_block_map[last_block].second = u-1; // current block begin m_block_map[block].first = u; } last_block = block; } m_block_map.erase(UBLOCK_INVALID_CODE); m_block_map.erase(UBLOCK_NO_BLOCK); InitBlockDescriptions(); }
Variant HHVM_STATIC_METHOD(IntlChar, getBlockCode, const Variant& arg) { GETCP(arg, cp); return ublock_getCode(cp); }
static int Character_ofImpl(JNIEnv*, jclass, jint codePoint) { return ublock_getCode(codePoint); }
//static int Character_ofImpl(JNIEnv*, jclass, jint codePoint) { JNIEXPORT jint JNICALL Java_java_lang_Character_ofImpl(JNIEnv*, jclass, jint codePoint) { return ublock_getCode(codePoint); }
PassRefPtr<SimpleFontData> FontCache::systemFallbackForCharacters(const FontDescription& description, const SimpleFontData* originalFontData, bool, const UChar* characters, int length) { String familyName; WCHAR name[LF_FACESIZE]; UChar character = characters[0]; const FontPlatformData& origFont = originalFontData->platformData(); if (IMLangFontLinkType* langFontLink = getFontLinkInterface()) { HGDIOBJ oldFont = GetCurrentObject(g_screenDC, OBJ_FONT); HFONT hfont = 0; DWORD codePages = 0; UINT codePage = 0; // Try MLang font linking first. langFontLink->GetCharCodePages(character, &codePages); if (codePages && u_getIntPropertyValue(character, UCHAR_UNIFIED_IDEOGRAPH)) { // The CJK character may belong to multiple code pages. We want to // do font linking against a single one of them, preferring the default // code page for the user's locale. const Vector<DWORD, 4>& CJKCodePageMasks = getCJKCodePageMasks(); unsigned numCodePages = CJKCodePageMasks.size(); for (unsigned i = 0; i < numCodePages; ++i) { hfont = createMLangFont(langFontLink, g_screenDC, origFont, CJKCodePageMasks[i]); if (!hfont) continue; SelectObject(g_screenDC, hfont); GetTextFace(g_screenDC, LF_FACESIZE, name); if (hfont && !(codePages & CJKCodePageMasks[i])) { // We asked about a code page that is not one of the code pages // returned by MLang, so the font might not contain the character. if (!currentFontContainsCharacter(langFontLink, g_screenDC, hfont, character, name)) { SelectObject(g_screenDC, oldFont); langFontLink->ReleaseFont(hfont); hfont = 0; continue; } } break; } } else { hfont = createMLangFont(langFontLink, g_screenDC, origFont, codePages, character); SelectObject(g_screenDC, hfont); GetTextFace(g_screenDC, LF_FACESIZE, name); } SelectObject(g_screenDC, oldFont); if (hfont) { familyName = name; langFontLink->ReleaseFont(hfont); } else FontPlatformData::mapKnownFont(codePages, familyName); } if (familyName.isEmpty()) familyName = FontPlatformData::defaultFontFamily(); if (!familyName.isEmpty()) { // FIXME: temporary workaround for Thai font problem FontDescription fontDescription(description); if (ublock_getCode(c) == UBLOCK_THAI && fontDescription.weight() > FontWeightNormal) fontDescription.setWeight(FontWeightNormal); FontPlatformData* result = getCachedFontPlatformData(fontDescription, familyName); if (result && result->hash() != origFont.hash()) { if (RefPtr<SimpleFontData> fontData = getCachedFontData(result, DoNotRetain)) return fontData.release(); } } return 0; }
int fastiva_vm_Character_C$__ofImpl(jint codePoint) { return ublock_getCode(codePoint); }
U_CAPI int32_t U_EXPORT2 u_getIntPropertyValue(UChar32 c, UProperty which) { UErrorCode errorCode; if(which<UCHAR_BINARY_START) { return 0; /* undefined */ } else if(which<UCHAR_BINARY_LIMIT) { return (int32_t)u_hasBinaryProperty(c, which); } else if(which<UCHAR_INT_START) { return 0; /* undefined */ } else if(which<UCHAR_INT_LIMIT) { switch(which) { case UCHAR_BIDI_CLASS: return (int32_t)u_charDirection(c); case UCHAR_BLOCK: return (int32_t)ublock_getCode(c); #if !UCONFIG_NO_NORMALIZATION case UCHAR_CANONICAL_COMBINING_CLASS: return u_getCombiningClass(c); #endif case UCHAR_DECOMPOSITION_TYPE: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK); case UCHAR_EAST_ASIAN_WIDTH: return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT; case UCHAR_GENERAL_CATEGORY: return (int32_t)u_charType(c); case UCHAR_JOINING_GROUP: return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); case UCHAR_JOINING_TYPE: return ubidi_getJoiningType(GET_BIDI_PROPS(), c); case UCHAR_LINE_BREAK: return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT; case UCHAR_NUMERIC_TYPE: { int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1)); return UPROPS_NTV_GET_TYPE(ntv); } case UCHAR_SCRIPT: errorCode=U_ZERO_ERROR; return (int32_t)uscript_getScript(c, &errorCode); case UCHAR_HANGUL_SYLLABLE_TYPE: { /* see comments on gcbToHst[] above */ int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; if(gcb<LENGTHOF(gcbToHst)) { return gcbToHst[gcb]; } else { return U_HST_NOT_APPLICABLE; } } #if !UCONFIG_NO_NORMALIZATION case UCHAR_NFD_QUICK_CHECK: case UCHAR_NFKD_QUICK_CHECK: case UCHAR_NFC_QUICK_CHECK: case UCHAR_NFKC_QUICK_CHECK: return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); case UCHAR_LEAD_CANONICAL_COMBINING_CLASS: return getFCD16(c)>>8; case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS: return getFCD16(c)&0xff; #endif case UCHAR_GRAPHEME_CLUSTER_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; case UCHAR_SENTENCE_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT; case UCHAR_WORD_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT; default: return 0; /* undefined */ } } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
// FIXME: // - Handle 'Inherited', 'Common' and 'Unknown' // (see http://www.unicode.org/reports/tr24/#Usage_Model ) // For 'Inherited' and 'Common', perhaps we need to // accept another parameter indicating the previous family // and just return it. // - All the characters (or characters up to the point a single // font can cover) need to be taken into account const UChar* getFallbackFamily(const UChar* characters, int length, FontDescription::GenericFamilyType generic, UChar32* charChecked, UScriptCode* scriptChecked) { ASSERT(characters && characters[0] && length > 0); UScriptCode script = USCRIPT_COMMON; // Sometimes characters common to script (e.g. space) is at // the beginning of a string so that we need to skip them // to get a font required to render the string. int i = 0; UChar32 ucs4 = 0; while (i < length && script == USCRIPT_COMMON || script == USCRIPT_INVALID_CODE) { U16_NEXT(characters, i, length, ucs4); UErrorCode err = U_ZERO_ERROR; script = uscript_getScript(ucs4, &err); // silently ignore the error } // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for // Han (determined in a locale-dependent way above). Full-width ASCII // characters are rather widely used in Japanese and Chinese documents and // they're fully covered by Chinese, Japanese and Korean fonts. if (0xFF00 < ucs4 && ucs4 < 0xFF5F) script = USCRIPT_HAN; // There are a lot of characters in USCRIPT_COMMON that can be covered // by fonts for scripts closely related to them. See // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] // FIXME: make this more efficient with a wider coverage if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) { UBlockCode block = ublock_getCode(ucs4); switch (block) { case UBLOCK_BASIC_LATIN: script = USCRIPT_LATIN; break; case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: script = USCRIPT_HAN; break; case UBLOCK_HIRAGANA: case UBLOCK_KATAKANA: script = USCRIPT_HIRAGANA; break; case UBLOCK_ARABIC: script = USCRIPT_ARABIC; break; case UBLOCK_GREEK: script = USCRIPT_GREEK; break; case UBLOCK_DEVANAGARI: // For Danda and Double Danda (U+0964, U+0965), use a Devanagari // font for now although they're used by other scripts as well. // Without a context, we can't do any better. script = USCRIPT_DEVANAGARI; break; case UBLOCK_ARMENIAN: script = USCRIPT_ARMENIAN; break; case UBLOCK_GEORGIAN: script = USCRIPT_GEORGIAN; break; case UBLOCK_KANNADA: script = USCRIPT_KANNADA; break; } } // Another lame work-around to cover non-BMP characters. const UChar* family = getFontFamilyForScript(script, generic); if (!family) { int plane = ucs4 >> 16; switch (plane) { case 1: family = L"code2001"; break; case 2: family = L"simsun-extb"; break; default: family = L"lucida sans unicode"; } }
UBlockCode __hs_ublock_getCode(UChar32 c) { return ublock_getCode(c); }