void Collator::createCollator() const { ASSERT(!m_collator); UErrorCode status = U_ZERO_ERROR; { Locker<Mutex> lock(cachedCollatorMutex()); if (cachedCollator) { const char* cachedCollatorLocale = ucol_getLocaleByType(cachedCollator, ULOC_REQUESTED_LOCALE, &status); ASSERT(U_SUCCESS(status)); ASSERT(cachedCollatorLocale); UColAttributeValue cachedCollatorLowerFirst = ucol_getAttribute(cachedCollator, UCOL_CASE_FIRST, &status); ASSERT(U_SUCCESS(status)); // FIXME: default locale is never matched, because ucol_getLocaleByType returns the actual one used, not 0. if (m_locale && 0 == strcmp(cachedCollatorLocale, m_locale) && ((UCOL_LOWER_FIRST == cachedCollatorLowerFirst && m_lowerFirst) || (UCOL_UPPER_FIRST == cachedCollatorLowerFirst && !m_lowerFirst))) { m_collator = cachedCollator; cachedCollator = 0; return; } } } m_collator = ucol_open(m_locale, &status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; m_collator = ucol_open("", &status); // Fallback to Unicode Collation Algorithm. } ASSERT(U_SUCCESS(status)); ucol_setAttribute(m_collator, UCOL_CASE_FIRST, m_lowerFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status); ASSERT(U_SUCCESS(status)); }
void Target::setTargetString(const UnicodeString *target) { if (charBreakIterator != NULL) { ubrk_close(charBreakIterator); ucol_closeElements(elements); } targetString = target; if (targetString != NULL) { UErrorCode status = U_ZERO_ERROR; targetBuffer = targetString->getBuffer(); targetLength = targetString->length(); elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status); ucol_forceHanImplicit(elements, &status); charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status), targetBuffer, targetLength, &status); } else { targetBuffer = NULL; targetLength = 0; } }
const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const { const char *result = ucol_getLocaleByType(ucollator, type, &status); if(result == NULL) { Locale res(""); res.setToBogus(); return res; } else { return Locale(result); } }
// Collator.actual_locale {{{ static PyObject * icu_Collator_actual_locale(icu_Collator *self, void *closure) { const char *loc = NULL; UErrorCode status = U_ZERO_ERROR; loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status); if (loc == NULL || U_FAILURE(status)) { PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL; } return Py_BuildValue("s", loc); }
static String HHVM_METHOD(Collator, getLocale, int64_t type) { FETCH_COL(data, this_, ""); data->clearError(); UErrorCode error = U_ZERO_ERROR; auto loc = ucol_getLocaleByType(data->collator(), (ULocDataLocaleType)type, &error); if (U_FAILURE(error)) { data->setError(error, "Error getting locale by type"); } return String(loc, CopyString); }
// Collator.display_name {{{ static PyObject * icu_Collator_display_name(icu_Collator *self, void *closure) { const char *loc = NULL; UErrorCode status = U_ZERO_ERROR; UChar dname[400]; int32_t sz = 0; loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status); if (loc == NULL) { PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL; } sz = ucol_getDisplayName(loc, "en", dname, sizeof(dname), &status); if (U_FAILURE(status)) {PyErr_SetString(PyExc_ValueError, u_errorName(status)); return NULL; } return icu_to_python(dname, sz); }
String c_Collator::t_getlocale(int64_t type /* = 0 */) { if (!m_ucoll) { raise_error("getlocale called on uninitialized Collator object"); return ""; } m_errcode.clearError(); UErrorCode error = U_ZERO_ERROR; String ret( (char*)ucol_getLocaleByType(m_ucoll, (ULocDataLocaleType)type, &error), CopyString); if (U_FAILURE(error)) { m_errcode.setError(error, "Error getting locale by type"); return ""; } return ret; }
String c_Collator::t_getlocale(int64_t type /* = 0 */) { if (!m_ucoll) { raise_warning("getlocale called on uninitialized Collator object"); return ""; } m_errcode.clear(); String ret( (char*)ucol_getLocaleByType(m_ucoll, (ULocDataLocaleType)type, &(m_errcode.code)), CopyString); if (U_FAILURE(m_errcode.code)) { m_errcode.custom_error_message = "Error getting locale by type"; s_intl_error->m_error.code = m_errcode.code; s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message; return ""; } return ret; }
String c_Collator::t_getlocale(int64 type /* = 0 */) { INSTANCE_METHOD_INJECTION_BUILTIN(Collator, Collator::getlocale); if (!m_ucoll) { raise_warning("getlocale called on uninitialized Collator object"); return ""; } m_errcode.clear(); String ret( (char*)ucol_getLocaleByType(m_ucoll, (ULocDataLocaleType)type, &(m_errcode.code)), AttachLiteral); if (U_FAILURE(m_errcode.code)) { m_errcode.custom_error_message = "Error getting locale by type"; s_intl_error->m_error.code = m_errcode.code; s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message; return ""; } return ret; }
// Collator.display_name {{{ static PyObject * icu_Collator_display_name(icu_Collator *self, void *closure) { const char *loc = NULL; UErrorCode status = U_ZERO_ERROR; UChar dname[400]; char buf[100]; loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status); if (loc == NULL || U_FAILURE(status)) { PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL; } ucol_getDisplayName(loc, "en", dname, 100, &status); if (U_FAILURE(status)) return PyErr_NoMemory(); u_strToUTF8(buf, 100, NULL, dname, -1, &status); if (U_FAILURE(status)) { PyErr_SetString(PyExc_Exception, "Failed to convert dname to UTF-8"); return NULL; } return Py_BuildValue("s", buf); }
static UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t offset, const UnicodeString &pattern, int32_t &matchStart, int32_t &matchEnd) { UErrorCode status = U_ZERO_ERROR; OrderList targetOrders(coll, target, offset); OrderList patternOrders(coll, pattern); int32_t targetSize = targetOrders.size() - 1; int32_t patternSize = patternOrders.size() - 1; UBreakIterator *charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status), target.getBuffer(), target.length(), &status); if (patternSize == 0) { // Searching for an empty pattern always fails matchStart = matchEnd = -1; ubrk_close(charBreakIterator); return FALSE; } matchStart = matchEnd = -1; for(int32_t i = 0; i < targetSize; i += 1) { if (targetOrders.matchesAt(i, patternOrders)) { int32_t start = targetOrders.getLowOffset(i); int32_t maxLimit = targetOrders.getLowOffset(i + patternSize); int32_t minLimit = targetOrders.getLowOffset(i + patternSize - 1); // if the low and high offsets of the first CE in // the match are the same, it means that the match // starts in the middle of an expansion - all but // the first CE of the expansion will have the offset // of the following character. if (start == targetOrders.getHighOffset(i)) { continue; } // Make sure match starts on a grapheme boundary if (! ubrk_isBoundary(charBreakIterator, start)) { continue; } // If the low and high offsets of the CE after the match // are the same, it means that the match ends in the middle // of an expansion sequence. if (maxLimit == targetOrders.getHighOffset(i + patternSize) && targetOrders.getOrder(i + patternSize) != UCOL_NULLORDER) { continue; } int32_t mend = maxLimit; // Find the first grapheme break after the character index // of the last CE in the match. If it's after character index // that's after the last CE in the match, use that index // as the end of the match. if (minLimit < maxLimit) { // When the last CE's low index is same with its high index, the CE is likely // a part of expansion. In this case, the index is located just after the // character corresponding to the CEs compared above. If the index is right // at the break boundary, move the position to the next boundary will result // incorrect match length when there are ignorable characters exist between // the position and the next character produces CE(s). See ticket#8482. if (minLimit == targetOrders.getHighOffset(i + patternSize - 1) && ubrk_isBoundary(charBreakIterator, minLimit)) { mend = minLimit; } else { int32_t nba = ubrk_following(charBreakIterator, minLimit); if (nba >= targetOrders.getHighOffset(i + patternSize - 1)) { mend = nba; } } } if (mend > maxLimit) { continue; } if (! ubrk_isBoundary(charBreakIterator, mend)) { continue; } matchStart = start; matchEnd = mend; ubrk_close(charBreakIterator); return TRUE; } } ubrk_close(charBreakIterator); return FALSE; }
void CollationServiceTest::TestRegister() { #if !UCONFIG_NO_SERVICE // register a singleton const Locale& FR = Locale::getFrance(); const Locale& US = Locale::getUS(); const Locale US_FOO("en", "US", "FOO"); UErrorCode status = U_ZERO_ERROR; Collator* frcol = Collator::createInstance(FR, status); Collator* uscol = Collator::createInstance(US, status); if(U_FAILURE(status)) { errcheckln(status, "Failed to create collators with %s", u_errorName(status)); delete frcol; delete uscol; return; } { // try override en_US collator URegistryKey key = Collator::registerInstance(frcol, US, status); Collator* ncol = Collator::createInstance(US_FOO, status); if (*frcol != *ncol) { errln("register of french collator for en_US failed on request for en_US_FOO"); } // ensure original collator's params not touched Locale loc = frcol->getLocale(ULOC_REQUESTED_LOCALE, status); if (loc != FR) { errln(UnicodeString("fr collator's requested locale changed to ") + loc.getName()); } loc = frcol->getLocale(ULOC_VALID_LOCALE, status); if (loc != FR) { errln(UnicodeString("fr collator's valid locale changed to ") + loc.getName()); } loc = ncol->getLocale(ULOC_REQUESTED_LOCALE, status); if (loc != US_FOO) { errln(UnicodeString("requested locale for en_US_FOO is not en_US_FOO but ") + loc.getName()); } loc = ncol->getLocale(ULOC_VALID_LOCALE, status); if (loc != US) { errln(UnicodeString("valid locale for en_US_FOO is not en_US but ") + loc.getName()); } loc = ncol->getLocale(ULOC_ACTUAL_LOCALE, status); if (loc != US) { errln(UnicodeString("actual locale for en_US_FOO is not en_US but ") + loc.getName()); } delete ncol; ncol = NULL; if (!Collator::unregister(key, status)) { errln("failed to unregister french collator"); } // !!! frcol pointer is now invalid !!! ncol = Collator::createInstance(US, status); if (*uscol != *ncol) { errln("collator after unregister does not match original"); } delete ncol; ncol = NULL; } // recreate frcol frcol = Collator::createInstance(FR, status); LocalUCollatorPointer frFR(ucol_open("fr_FR", &status)); { // try create collator for new locale Locale fu_FU_FOO("fu", "FU", "FOO"); Locale fu_FU("fu", "FU", ""); Collator* fucol = Collator::createInstance(fu_FU, status); URegistryKey key = Collator::registerInstance(frcol, fu_FU, status); Collator* ncol = Collator::createInstance(fu_FU_FOO, status); if (*frcol != *ncol) { errln("register of fr collator for fu_FU failed"); } UnicodeString locName = fu_FU.getName(); StringEnumeration* localeEnum = Collator::getAvailableLocales(); UBool found = FALSE; const UnicodeString* locStr, *ls2; for (locStr = localeEnum->snext(status); !found && locStr != NULL; locStr = localeEnum->snext(status)) { // if (locName == *locStr) { found = TRUE; } } StringEnumeration *le2 = NULL; localeEnum->reset(status); int32_t i, count; count = localeEnum->count(status); for(i = 0; i < count; ++i) { if(i == count / 2) { le2 = localeEnum->clone(); if(le2 == NULL || count != le2->count(status)) { errln("ServiceEnumeration.clone() failed"); break; } } if(i >= count / 2) { locStr = localeEnum->snext(status); ls2 = le2->snext(status); if(*locStr != *ls2) { errln("ServiceEnumeration.clone() failed for item %d", i); } } else { localeEnum->snext(status); } } delete localeEnum; delete le2; if (!found) { errln("new locale fu_FU not reported as supported locale"); } UnicodeString displayName; Collator::getDisplayName(fu_FU, displayName); /* The locale display pattern for the locale ja, ko, and zh are different. */ const UChar zh_fuFU_Array[] = { 0x0066, 0x0075, 0xff08, 0x0046, 0x0055, 0xff09, 0 }; const UnicodeString zh_fuFU(zh_fuFU_Array); const Locale& defaultLocale = Locale::getDefault(); if (displayName != "fu (FU)" && ((defaultLocale == Locale::getKorean() && defaultLocale == Locale::getJapanese()) && displayName == "fu(FU)") && ((defaultLocale == Locale::getChinese()) && displayName != zh_fuFU)) { errln(UnicodeString("found ") + displayName + " for fu_FU"); } Collator::getDisplayName(fu_FU, fu_FU, displayName); if (displayName != "fu (FU)" && ((defaultLocale == Locale::getKorean() && defaultLocale == Locale::getJapanese()) && displayName == "fu(FU)") && ((defaultLocale == Locale::getChinese()) && displayName != zh_fuFU)) { errln(UnicodeString("found ") + displayName + " for fu_FU"); } // test ucol_open LocalUCollatorPointer fufu(ucol_open("fu_FU_FOO", &status)); if (fufu.isNull()) { errln("could not open fu_FU_FOO with ucol_open"); } else { if (!ucol_equals(fufu.getAlias(), frFR.getAlias())) { errln("collator fufu != collator frFR"); } } if (!Collator::unregister(key, status)) { errln("failed to unregister french collator"); } // !!! note frcoll invalid again, but we're no longer using it // other collators should still work ok Locale nloc = ncol->getLocale(ULOC_VALID_LOCALE, status); if (nloc != fu_FU) { errln(UnicodeString("asked for nloc valid locale after close and got") + nloc.getName()); } delete ncol; ncol = NULL; if (fufu.isValid()) { const char* nlocstr = ucol_getLocaleByType(fufu.getAlias(), ULOC_VALID_LOCALE, &status); if (uprv_strcmp(nlocstr, "fu_FU") != 0) { errln(UnicodeString("asked for uloc valid locale after close and got ") + nlocstr); } } ncol = Collator::createInstance(fu_FU, status); if (*fucol != *ncol) { errln("collator after unregister does not match original fu_FU"); } delete uscol; uscol = NULL; delete ncol; ncol = NULL; delete fucol; fucol = NULL; } #endif }
U_CAPI int32_t U_EXPORT2 ucol_getShortDefinitionString(const UCollator *coll, const char *locale, char *dst, int32_t capacity, UErrorCode *status) { if(U_FAILURE(*status)) return 0; if(coll->delegate != NULL) { return ((icu::Collator*)coll->delegate)->internalGetShortDefinitionString(locale,dst,capacity,*status); } char buffer[internalBufferSize]; uprv_memset(buffer, 0, internalBufferSize*sizeof(char)); int32_t resultSize = 0; char tempbuff[internalBufferSize]; char locBuff[internalBufferSize]; uprv_memset(buffer, 0, internalBufferSize*sizeof(char)); int32_t elementSize = 0; UBool isAvailable = 0; CollatorSpec s; ucol_sit_initCollatorSpecs(&s); if(!locale) { locale = ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, status); } elementSize = ucol_getFunctionalEquivalent(locBuff, internalBufferSize, "collation", locale, &isAvailable, status); if(elementSize) { // we should probably canonicalize here... elementSize = uloc_getLanguage(locBuff, tempbuff, internalBufferSize, status); appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, languageArg); elementSize = uloc_getCountry(locBuff, tempbuff, internalBufferSize, status); appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, regionArg); elementSize = uloc_getScript(locBuff, tempbuff, internalBufferSize, status); appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, scriptArg); elementSize = uloc_getVariant(locBuff, tempbuff, internalBufferSize, status); appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, variantArg); elementSize = uloc_getKeywordValue(locBuff, "collation", tempbuff, internalBufferSize, status); appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, keywordArg); } int32_t i = 0; UColAttributeValue attribute = UCOL_DEFAULT; for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) { if(options[i].action == _processCollatorOption) { attribute = ucol_getAttributeOrDefault(coll, (UColAttribute)options[i].attr, status); if(attribute != UCOL_DEFAULT) { char letter = ucol_sit_attributeValueToLetter(attribute, status); appendShortStringElement(&letter, 1, buffer, &resultSize, /*capacity*/internalBufferSize, options[i].optionStart); } } } if(coll->variableTopValueisDefault == FALSE) { //s.variableTopValue = ucol_getVariableTop(coll, status); elementSize = T_CString_integerToString(tempbuff, coll->variableTopValue, 16); appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, variableTopValArg); } UParseError parseError; return ucol_normalizeShortDefinitionString(buffer, dst, capacity, &parseError, status); }