Beispiel #1
0
void Collator::createCollator() const
{
    ASSERT(!m_collator);
    UErrorCode status = U_ZERO_ERROR;

    {
        Locker<Mutex> lock(cachedCollatorMutex());
        if (cachedCollator) {
            const char* cachedCollatorLocale = ucol_getLocaleByType(cachedCollator, ULOC_REQUESTED_LOCALE, &status);
            ASSERT(U_SUCCESS(status));
            ASSERT(cachedCollatorLocale);

            UColAttributeValue cachedCollatorLowerFirst = ucol_getAttribute(cachedCollator, UCOL_CASE_FIRST, &status);
            ASSERT(U_SUCCESS(status));

            // FIXME: default locale is never matched, because ucol_getLocaleByType returns the actual one used, not 0.
            if (m_locale && 0 == strcmp(cachedCollatorLocale, m_locale)
                && ((UCOL_LOWER_FIRST == cachedCollatorLowerFirst && m_lowerFirst) || (UCOL_UPPER_FIRST == cachedCollatorLowerFirst && !m_lowerFirst))) {
                m_collator = cachedCollator;
                cachedCollator = 0;
                return;
            }
        }
    }

    m_collator = ucol_open(m_locale, &status);
    if (U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        m_collator = ucol_open("", &status); // Fallback to Unicode Collation Algorithm.
    }
    ASSERT(U_SUCCESS(status));

    ucol_setAttribute(m_collator, UCOL_CASE_FIRST, m_lowerFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status);
    ASSERT(U_SUCCESS(status));
}
Beispiel #2
0
void Target::setTargetString(const UnicodeString *target)
{
    if (charBreakIterator != NULL) {
        ubrk_close(charBreakIterator);
        ucol_closeElements(elements);
    }

    targetString = target;

    if (targetString != NULL) {
        UErrorCode status = U_ZERO_ERROR;

        targetBuffer = targetString->getBuffer();
        targetLength = targetString->length();

        elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status);
        ucol_forceHanImplicit(elements, &status);

        charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
                                      targetBuffer, targetLength, &status);
    } else {
        targetBuffer = NULL;
        targetLength = 0;
    }
}
Beispiel #3
0
const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
    const char *result = ucol_getLocaleByType(ucollator, type, &status);
    if(result == NULL) {
        Locale res("");
        res.setToBogus();
        return res;
    } else {
        return Locale(result);
    }
}
Beispiel #4
0
// Collator.actual_locale {{{
static PyObject *
icu_Collator_actual_locale(icu_Collator *self, void *closure) {
    const char *loc = NULL;
    UErrorCode status = U_ZERO_ERROR;

    loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status);
    if (loc == NULL || U_FAILURE(status)) {
        PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL;
    }
    return Py_BuildValue("s", loc);
}
Beispiel #5
0
static String HHVM_METHOD(Collator, getLocale, int64_t type) {
  FETCH_COL(data, this_, "");
  data->clearError();
  UErrorCode error = U_ZERO_ERROR;
  auto loc = ucol_getLocaleByType(data->collator(), (ULocDataLocaleType)type,
                                  &error);
  if (U_FAILURE(error)) {
    data->setError(error, "Error getting locale by type");
  }
  return String(loc, CopyString);
}
Beispiel #6
0
// Collator.display_name {{{
static PyObject *
icu_Collator_display_name(icu_Collator *self, void *closure) {
    const char *loc = NULL;
    UErrorCode status = U_ZERO_ERROR;
    UChar dname[400];
    int32_t sz = 0;

    loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status);
    if (loc == NULL) {
        PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL;
    }
    sz = ucol_getDisplayName(loc, "en", dname, sizeof(dname), &status);
    if (U_FAILURE(status)) {PyErr_SetString(PyExc_ValueError, u_errorName(status)); return NULL; }

    return icu_to_python(dname, sz);
}
Beispiel #7
0
String c_Collator::t_getlocale(int64_t type /* = 0 */) {
  if (!m_ucoll) {
    raise_error("getlocale called on uninitialized Collator object");
    return "";
  }
  m_errcode.clearError();
  UErrorCode error = U_ZERO_ERROR;
  String ret(
    (char*)ucol_getLocaleByType(m_ucoll, (ULocDataLocaleType)type,
                                &error),
    CopyString);
  if (U_FAILURE(error)) {
    m_errcode.setError(error, "Error getting locale by type");
    return "";
  }
  return ret;
}
Beispiel #8
0
String c_Collator::t_getlocale(int64_t type /* = 0 */) {
  if (!m_ucoll) {
    raise_warning("getlocale called on uninitialized Collator object");
    return "";
  }
  m_errcode.clear();
  String ret(
    (char*)ucol_getLocaleByType(m_ucoll, (ULocDataLocaleType)type,
                                &(m_errcode.code)),
    CopyString);
  if (U_FAILURE(m_errcode.code)) {
    m_errcode.custom_error_message = "Error getting locale by type";
    s_intl_error->m_error.code = m_errcode.code;
    s_intl_error->m_error.custom_error_message =
      m_errcode.custom_error_message;
    return "";
  }
  return ret;
}
Beispiel #9
0
String c_Collator::t_getlocale(int64 type /* = 0 */) {
  INSTANCE_METHOD_INJECTION_BUILTIN(Collator, Collator::getlocale);
  if (!m_ucoll) {
    raise_warning("getlocale called on uninitialized Collator object");
    return "";
  }
  m_errcode.clear();
  String ret(
    (char*)ucol_getLocaleByType(m_ucoll, (ULocDataLocaleType)type,
                                &(m_errcode.code)),
    AttachLiteral);
  if (U_FAILURE(m_errcode.code)) {
    m_errcode.custom_error_message = "Error getting locale by type";
    s_intl_error->m_error.code = m_errcode.code;
    s_intl_error->m_error.custom_error_message =
      m_errcode.custom_error_message;
    return "";
  }
  return ret;
}
Beispiel #10
0
// Collator.display_name {{{
static PyObject *
icu_Collator_display_name(icu_Collator *self, void *closure) {
    const char *loc = NULL;
    UErrorCode status = U_ZERO_ERROR;
    UChar dname[400];
    char buf[100];

    loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status);
    if (loc == NULL || U_FAILURE(status)) {
        PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL;
    }
    ucol_getDisplayName(loc, "en", dname, 100, &status);
    if (U_FAILURE(status)) return PyErr_NoMemory();

    u_strToUTF8(buf, 100, NULL, dname, -1, &status);
    if (U_FAILURE(status)) {
        PyErr_SetString(PyExc_Exception, "Failed to convert dname to UTF-8"); return NULL;
    }
    return Py_BuildValue("s", buf);
}
Beispiel #11
0
static UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t offset, const UnicodeString &pattern, int32_t &matchStart, int32_t &matchEnd)
{
    UErrorCode      status = U_ZERO_ERROR;
    OrderList       targetOrders(coll, target, offset);
    OrderList       patternOrders(coll, pattern);
    int32_t         targetSize  = targetOrders.size() - 1;
    int32_t         patternSize = patternOrders.size() - 1;
    UBreakIterator *charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
                                                  target.getBuffer(), target.length(), &status);

    if (patternSize == 0) {
        // Searching for an empty pattern always fails
        matchStart = matchEnd = -1;
        ubrk_close(charBreakIterator);
        return FALSE;
    }

    matchStart = matchEnd = -1;

    for(int32_t i = 0; i < targetSize; i += 1) {
        if (targetOrders.matchesAt(i, patternOrders)) {
            int32_t start    = targetOrders.getLowOffset(i);
            int32_t maxLimit = targetOrders.getLowOffset(i + patternSize);
            int32_t minLimit = targetOrders.getLowOffset(i + patternSize - 1);

            // if the low and high offsets of the first CE in
            // the match are the same, it means that the match
            // starts in the middle of an expansion - all but
            // the first CE of the expansion will have the offset
            // of the following character.
            if (start == targetOrders.getHighOffset(i)) {
                continue;
            }

            // Make sure match starts on a grapheme boundary
            if (! ubrk_isBoundary(charBreakIterator, start)) {
                continue;
            }

            // If the low and high offsets of the CE after the match
            // are the same, it means that the match ends in the middle
            // of an expansion sequence.
            if (maxLimit == targetOrders.getHighOffset(i + patternSize) &&
                targetOrders.getOrder(i + patternSize) != UCOL_NULLORDER) {
                continue;
            }

            int32_t mend = maxLimit;

            // Find the first grapheme break after the character index
            // of the last CE in the match. If it's after character index
            // that's after the last CE in the match, use that index
            // as the end of the match.
            if (minLimit < maxLimit) {
                // When the last CE's low index is same with its high index, the CE is likely
                // a part of expansion. In this case, the index is located just after the
                // character corresponding to the CEs compared above. If the index is right
                // at the break boundary, move the position to the next boundary will result
                // incorrect match length when there are ignorable characters exist between
                // the position and the next character produces CE(s). See ticket#8482.
                if (minLimit == targetOrders.getHighOffset(i + patternSize - 1) && ubrk_isBoundary(charBreakIterator, minLimit)) {
                    mend = minLimit;
                } else {
                    int32_t nba = ubrk_following(charBreakIterator, minLimit);

                    if (nba >= targetOrders.getHighOffset(i + patternSize - 1)) {
                        mend = nba;
                    }
                }
            }

            if (mend > maxLimit) {
                continue;
            }

            if (! ubrk_isBoundary(charBreakIterator, mend)) {
                continue;
            }

            matchStart = start;
            matchEnd   = mend;

            ubrk_close(charBreakIterator);
            return TRUE;
        }
    }

    ubrk_close(charBreakIterator);
    return FALSE;
}
void CollationServiceTest::TestRegister()
{
#if !UCONFIG_NO_SERVICE
    // register a singleton
    const Locale& FR = Locale::getFrance();
    const Locale& US = Locale::getUS();
    const Locale US_FOO("en", "US", "FOO");

    UErrorCode status = U_ZERO_ERROR;

    Collator* frcol = Collator::createInstance(FR, status);
    Collator* uscol = Collator::createInstance(US, status);
    if(U_FAILURE(status)) {
        errcheckln(status, "Failed to create collators with %s", u_errorName(status));
        delete frcol;
        delete uscol;
        return;
    }

    { // try override en_US collator
        URegistryKey key = Collator::registerInstance(frcol, US, status);

        Collator* ncol = Collator::createInstance(US_FOO, status);
        if (*frcol != *ncol) {
            errln("register of french collator for en_US failed on request for en_US_FOO");
        }
        // ensure original collator's params not touched
        Locale loc = frcol->getLocale(ULOC_REQUESTED_LOCALE, status);
        if (loc != FR) {
          errln(UnicodeString("fr collator's requested locale changed to ") + loc.getName());
        }
        loc = frcol->getLocale(ULOC_VALID_LOCALE, status);
        if (loc != FR) {
          errln(UnicodeString("fr collator's valid locale changed to ") + loc.getName());
        }

        loc = ncol->getLocale(ULOC_REQUESTED_LOCALE, status);
        if (loc != US_FOO) {
            errln(UnicodeString("requested locale for en_US_FOO is not en_US_FOO but ") + loc.getName());
        }
        loc = ncol->getLocale(ULOC_VALID_LOCALE, status);
        if (loc != US) {
            errln(UnicodeString("valid locale for en_US_FOO is not en_US but ") + loc.getName());
        }
        loc = ncol->getLocale(ULOC_ACTUAL_LOCALE, status);
        if (loc != US) {
            errln(UnicodeString("actual locale for en_US_FOO is not en_US but ") + loc.getName());
        }
        delete ncol; ncol = NULL;

        if (!Collator::unregister(key, status)) {
            errln("failed to unregister french collator");
        }
        // !!! frcol pointer is now invalid !!!

        ncol = Collator::createInstance(US, status);
        if (*uscol != *ncol) {
            errln("collator after unregister does not match original");
        }
        delete ncol; ncol = NULL;
    }

    // recreate frcol
    frcol = Collator::createInstance(FR, status);

    LocalUCollatorPointer frFR(ucol_open("fr_FR", &status));

    { // try create collator for new locale
        Locale fu_FU_FOO("fu", "FU", "FOO");
        Locale fu_FU("fu", "FU", "");

        Collator* fucol = Collator::createInstance(fu_FU, status);
        URegistryKey key = Collator::registerInstance(frcol, fu_FU, status);
        Collator* ncol = Collator::createInstance(fu_FU_FOO, status);
        if (*frcol != *ncol) {
            errln("register of fr collator for fu_FU failed");
        }

        UnicodeString locName = fu_FU.getName();
        StringEnumeration* localeEnum = Collator::getAvailableLocales();
        UBool found = FALSE;
        const UnicodeString* locStr, *ls2;
        for (locStr = localeEnum->snext(status);
        !found && locStr != NULL;
        locStr = localeEnum->snext(status)) {
            //
            if (locName == *locStr) {
                found = TRUE;
            }
        }

        StringEnumeration *le2 = NULL;
        localeEnum->reset(status);
        int32_t i, count;
        count = localeEnum->count(status);
        for(i = 0; i < count; ++i) {
            if(i == count / 2) {
                le2 = localeEnum->clone();
                if(le2 == NULL || count != le2->count(status)) {
                    errln("ServiceEnumeration.clone() failed");
                    break;
                }
            }
            if(i >= count / 2) {
                locStr = localeEnum->snext(status);
                ls2 = le2->snext(status);
                if(*locStr != *ls2) {
                    errln("ServiceEnumeration.clone() failed for item %d", i);
                }
            } else {
                localeEnum->snext(status);
            }
        }

        delete localeEnum;
        delete le2;

        if (!found) {
            errln("new locale fu_FU not reported as supported locale");
        }

        UnicodeString displayName;
        Collator::getDisplayName(fu_FU, displayName);
        /* The locale display pattern for the locale ja, ko, and zh are different. */
        const UChar zh_fuFU_Array[] = { 0x0066, 0x0075, 0xff08, 0x0046, 0x0055, 0xff09, 0 };
        const UnicodeString zh_fuFU(zh_fuFU_Array);
        const Locale& defaultLocale = Locale::getDefault();
        if (displayName != "fu (FU)" &&
           ((defaultLocale == Locale::getKorean() && defaultLocale == Locale::getJapanese()) && displayName == "fu(FU)") &&
           ((defaultLocale == Locale::getChinese()) && displayName != zh_fuFU)) {
            errln(UnicodeString("found ") + displayName + " for fu_FU");
        }

        Collator::getDisplayName(fu_FU, fu_FU, displayName);
        if (displayName != "fu (FU)" &&
           ((defaultLocale == Locale::getKorean() && defaultLocale == Locale::getJapanese()) && displayName == "fu(FU)") &&
           ((defaultLocale == Locale::getChinese()) && displayName != zh_fuFU)) {
            errln(UnicodeString("found ") + displayName + " for fu_FU");
        }

        // test ucol_open
        LocalUCollatorPointer fufu(ucol_open("fu_FU_FOO", &status));
        if (fufu.isNull()) {
            errln("could not open fu_FU_FOO with ucol_open");
        } else {
            if (!ucol_equals(fufu.getAlias(), frFR.getAlias())) {
                errln("collator fufu != collator frFR");
            }
        }

        if (!Collator::unregister(key, status)) {
            errln("failed to unregister french collator");
        }
        // !!! note frcoll invalid again, but we're no longer using it

        // other collators should still work ok
        Locale nloc = ncol->getLocale(ULOC_VALID_LOCALE, status);
        if (nloc != fu_FU) {
            errln(UnicodeString("asked for nloc valid locale after close and got") + nloc.getName());
        }
        delete ncol; ncol = NULL;

        if (fufu.isValid()) {
            const char* nlocstr = ucol_getLocaleByType(fufu.getAlias(), ULOC_VALID_LOCALE, &status);
            if (uprv_strcmp(nlocstr, "fu_FU") != 0) {
                errln(UnicodeString("asked for uloc valid locale after close and got ") + nlocstr);
            }
        }

        ncol = Collator::createInstance(fu_FU, status);
        if (*fucol != *ncol) {
            errln("collator after unregister does not match original fu_FU");
        }
        delete uscol; uscol = NULL;
        delete ncol; ncol = NULL;
        delete fucol; fucol = NULL;
    }
#endif
}
Beispiel #13
0
U_CAPI int32_t U_EXPORT2
ucol_getShortDefinitionString(const UCollator *coll,
                              const char *locale,
                              char *dst,
                              int32_t capacity,
                              UErrorCode *status)
{
    if(U_FAILURE(*status)) return 0;
    if(coll->delegate != NULL) {
      return ((icu::Collator*)coll->delegate)->internalGetShortDefinitionString(locale,dst,capacity,*status);
    }
    char buffer[internalBufferSize];
    uprv_memset(buffer, 0, internalBufferSize*sizeof(char));
    int32_t resultSize = 0;
    char tempbuff[internalBufferSize];
    char locBuff[internalBufferSize];
    uprv_memset(buffer, 0, internalBufferSize*sizeof(char));
    int32_t elementSize = 0;
    UBool isAvailable = 0;
    CollatorSpec s;
    ucol_sit_initCollatorSpecs(&s);

    if(!locale) {
        locale = ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, status);
    }
    elementSize = ucol_getFunctionalEquivalent(locBuff, internalBufferSize, "collation", locale, &isAvailable, status);

    if(elementSize) {
        // we should probably canonicalize here...
        elementSize = uloc_getLanguage(locBuff, tempbuff, internalBufferSize, status);
        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, languageArg);
        elementSize = uloc_getCountry(locBuff, tempbuff, internalBufferSize, status);
        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, regionArg);
        elementSize = uloc_getScript(locBuff, tempbuff, internalBufferSize, status);
        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, scriptArg);
        elementSize = uloc_getVariant(locBuff, tempbuff, internalBufferSize, status);
        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, variantArg);
        elementSize = uloc_getKeywordValue(locBuff, "collation", tempbuff, internalBufferSize, status);
        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, /*capacity*/internalBufferSize, keywordArg);
    }

    int32_t i = 0;
    UColAttributeValue attribute = UCOL_DEFAULT;
    for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) {
        if(options[i].action == _processCollatorOption) {
            attribute = ucol_getAttributeOrDefault(coll, (UColAttribute)options[i].attr, status);
            if(attribute != UCOL_DEFAULT) {
                char letter = ucol_sit_attributeValueToLetter(attribute, status);
                appendShortStringElement(&letter, 1,
                    buffer, &resultSize, /*capacity*/internalBufferSize, options[i].optionStart);
            }
        }
    }
    if(coll->variableTopValueisDefault == FALSE) {
        //s.variableTopValue = ucol_getVariableTop(coll, status);
        elementSize = T_CString_integerToString(tempbuff, coll->variableTopValue, 16);
        appendShortStringElement(tempbuff, elementSize, buffer, &resultSize, capacity, variableTopValArg);
    }

    UParseError parseError;
    return ucol_normalizeShortDefinitionString(buffer, dst, capacity, &parseError, status);
}