static int t_localedata_init(t_localedata *self, PyObject *args, PyObject *kwds) { charsArg id; switch (PyTuple_Size(args)) { case 1: if (!parseArgs(args, "n", &id)) { ULocaleData *locale_data; INT_STATUS_CALL(locale_data = ulocdata_open(id, &status)); self->object = locale_data; self->locale_id = strdup((const char *) id); self->flags = T_OWNED; break; } PyErr_SetArgsError((PyObject *) self, "__init__", args); return -1; default: PyErr_SetArgsError((PyObject *) self, "__init__", args); return -1; } if (self->object) return 0; return -1; }
void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) { LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status)); if (U_FAILURE(status)) { return; } UnicodeSet exemplars; ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); if (U_SUCCESS(status)) { initialLabels_->addAll(exemplars); return; } status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR // The locale data did not include explicit Index characters. // Synthesize a set of them from the locale's standard exemplar characters. ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); if (U_FAILURE(status)) { return; } // question: should we add auxiliary exemplars? if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) { exemplars.add(0x61, 0x7A); } if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables // cut down to small list exemplars.remove(0xAC00, 0xD7A3). add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C). add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544). add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0). add(0xD30C).add(0xD558); } if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block // cut down to small list // make use of the fact that Ethiopic is allocated in 8's, where // the base is 0 mod 8. UnicodeSet ethiopic( UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status); UnicodeSetIterator it(ethiopic); while (it.next() && !it.isString()) { if ((it.getCodepoint() & 0x7) != 0) { exemplars.remove(it.getCodepoint()); } } } // Upper-case any that aren't already so. // (We only do this for synthesized index characters.) UnicodeSetIterator it(exemplars); UnicodeString upperC; while (it.next()) { const UnicodeString &exemplarC = it.getString(); upperC = exemplarC; upperC.toUpper(locale); initialLabels_->add(upperC); } }
static bool __CFLocaleCopyExemplarCharSet(CFLocaleRef locale, bool user, CFTypeRef *cf, CFStringRef context) { char localeID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; if (CFStringGetCString(locale->_identifier, localeID, sizeof(localeID)/sizeof(char), kCFStringEncodingASCII)) { UErrorCode icuStatus = U_ZERO_ERROR; ULocaleData* uld = ulocdata_open(localeID, &icuStatus); USet *set = ulocdata_getExemplarSet(uld, NULL, USET_ADD_CASE_MAPPINGS, ULOCDATA_ES_STANDARD, &icuStatus); ulocdata_close(uld); if (U_FAILURE(icuStatus)) return false; if (icuStatus == U_USING_DEFAULT_WARNING) // If default locale used, force to empty set uset_clear(set); *cf = (CFTypeRef) _CFCreateCharacterSetFromUSet(set); uset_close(set); return (*cf != NULL); } return false; }
void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) { if (U_FAILURE(status)) { return; } // Chinese index characters, which are specific to each of the several Chinese tailorings, // take precedence over the single locale data exemplar set per language. const char *language = locale.getLanguage(); if (uprv_strcmp(language, "zh") == 0 || uprv_strcmp(language, "ja") == 0 || uprv_strcmp(language, "ko") == 0) { // TODO: This should be done regardless of the language, but it's expensive. // We should add a Collator function (can be @internal) // to enumerate just the contractions that start with a given code point or string. if (addChineseIndexCharacters(status) || U_FAILURE(status)) { return; } } LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status)); if (U_FAILURE(status)) { return; } UnicodeSet exemplars; ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); if (U_SUCCESS(status)) { initialLabels_->addAll(exemplars); return; } status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR // The locale data did not include explicit Index characters. // Synthesize a set of them from the locale's standard exemplar characters. ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); if (U_FAILURE(status)) { return; } // question: should we add auxiliary exemplars? if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) { exemplars.add(0x61, 0x7A); } if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables // cut down to small list exemplars.remove(0xAC00, 0xD7A3). add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C). add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544). add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0). add(0xD30C).add(0xD558); } if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block // cut down to small list // make use of the fact that Ethiopic is allocated in 8's, where // the base is 0 mod 8. UnicodeSet ethiopic( UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status); UnicodeSetIterator it(ethiopic); while (it.next() && !it.isString()) { if ((it.getCodepoint() & 0x7) != 0) { exemplars.remove(it.getCodepoint()); } } } // Upper-case any that aren't already so. // (We only do this for synthesized index characters.) UnicodeSetIterator it(exemplars); UnicodeString upperC; while (it.next()) { const UnicodeString &exemplarC = it.getString(); upperC = exemplarC; upperC.toUpper(locale); initialLabels_->add(upperC); } }
void AlphabeticIndex::getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status) { if (U_FAILURE(status)) { return; } LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status)); UnicodeSet exemplars; ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); if (U_SUCCESS(status)) { dest.addAll(exemplars); return; } status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR // Locale data did not include explicit Index characters. // Synthesize a set of them from the locale's standard exemplar characters. ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); if (U_FAILURE(status)) { return; } // Upper-case any that aren't already so. // (We only do this for synthesized index characters.) UnicodeSetIterator it(exemplars); UnicodeString upperC; UnicodeSet lowersToRemove; UnicodeSet uppersToAdd; while (it.next()) { const UnicodeString &exemplarC = it.getString(); upperC = exemplarC; upperC.toUpper(locale); if (exemplarC != upperC) { lowersToRemove.add(exemplarC); uppersToAdd.add(upperC); } } exemplars.removeAll(lowersToRemove); exemplars.addAll(uppersToAdd); // get the exemplars, and handle special cases // question: should we add auxiliary exemplars? if (exemplars.containsSome(*CORE_LATIN)) { exemplars.addAll(*CORE_LATIN); } if (exemplars.containsSome(*HANGUL)) { // cut down to small list UnicodeSet BLOCK_HANGUL_SYLLABLES(UNICODE_STRING_SIMPLE("[:block=hangul_syllables:]"), status); exemplars.removeAll(BLOCK_HANGUL_SYLLABLES); exemplars.addAll(*HANGUL); } if (exemplars.containsSome(*ETHIOPIC)) { // cut down to small list // make use of the fact that Ethiopic is allocated in 8's, where // the base is 0 mod 8. UnicodeSetIterator it(*ETHIOPIC); while (it.next() && !it.isString()) { if ((it.getCodepoint() & 0x7) != 0) { exemplars.remove(it.getCodepoint()); } } } dest.addAll(exemplars); }