Ejemplo n.º 1
0
//static jstring NativeCollation_getRules(JNIEnv* env, jclass, jint address) {
JNIEXPORT jstring JNICALL
Java_com_ibm_icu4jni_text_NativeCollation_getRules(JNIEnv* env, jclass,
		jint address) {
	int32_t length = 0;
	const UChar* rules = ucol_getRules(toCollator(address), &length);
	return env->NewString((const jchar *) rules, length);
}
Ejemplo n.º 2
0
StringSearch::StringSearch(const StringSearch &that) :
                       SearchIterator(that.m_text_, that.m_breakiterator_),
                       m_collator_(),
                       m_pattern_(that.m_pattern_)
{
    UErrorCode status = U_ZERO_ERROR;
    if (that.m_strsrch_ == NULL) {
        m_strsrch_ = NULL;
        status     = U_ILLEGAL_ARGUMENT_ERROR;
    }
    else {
        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 
                                              m_pattern_.length(), 
                                              m_text_.getBuffer(), 
                                              m_text_.length(), 
                                              that.m_strsrch_->collator, 
                                     (UBreakIterator *)that.m_breakiterator_, 
                                              &status);
    }
    uprv_free(m_search_);
    m_search_ = NULL;

    if (U_SUCCESS(status)) {
        int32_t  length;
        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
        m_collation_rules_.setTo(rules, length);
        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
                                 &m_collation_rules_);
        // m_search_ has been created by the base SearchIterator class
        m_search_        = m_strsrch_->search;
        m_breakiterator_ = that.m_breakiterator_;
    }
}
Ejemplo n.º 3
0
/*
 * The collator returned by this function is owned by the callee and must be
 * closed when this method returns with a U_SUCCESS UErrorCode.
 *
 * On error, the return value is undefined.
 */
UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr)
{
    UColAttributeValue strength = ucol_getStrength(pCollator);

    bool isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase;
    bool isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace;
    bool isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols;

    if (isIgnoreCase)
    {
        strength = UCOL_SECONDARY;
    }

    if (isIgnoreNonSpace)
    {
        strength = UCOL_PRIMARY;
    }

    UCollator* pClonedCollator;
    std::vector<UChar> customRules = GetCustomRules(options, strength, isIgnoreSymbols);
    if (customRules.empty())
    {
        pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr);
    }
    else
    {
        int32_t customRuleLength = customRules.size();

        int32_t localeRulesLength;
        const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength);

        std::vector<UChar> completeRules(localeRulesLength + customRuleLength + 1, '\0');
        for (int i = 0; i < localeRulesLength; i++)
        {
            completeRules[i] = localeRules[i];
        }
        for (int i = 0; i < customRuleLength; i++)
        {
            completeRules[localeRulesLength + i] = customRules[i];
        }

        pClonedCollator = ucol_openRules(completeRules.data(), completeRules.size(), UCOL_DEFAULT, strength, NULL, pErr);
    }

    if (isIgnoreSymbols)
    {
        ucol_setAttribute(pClonedCollator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, pErr);
    }

    ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, strength, pErr);

    // casing differs at the tertiary level.
    // if strength is less than tertiary, but we are not ignoring case, then we need to flip CASE_LEVEL On
    if (strength < UCOL_TERTIARY && !isIgnoreCase)
    {
        ucol_setAttribute(pClonedCollator, UCOL_CASE_LEVEL, UCOL_ON, pErr);
    }

    return pClonedCollator;
}
U_CAPI int32_t U_EXPORT2
ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
    UErrorCode status = U_ZERO_ERROR;
    int32_t len = 0;
    int32_t UCAlen = 0;
    const UChar* ucaRules = 0;
    const UChar *rules = ucol_getRules(coll, &len);
    if(delta == UCOL_FULL_RULES) {
        /* take the UCA rules and append real rules at the end */
        /* UCA rules will be probably coming from the root RB */
        ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
        /*
        UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
        UResourceBundle*  uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
        ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
        ures_close(uca);
        ures_close(cresb);
        */
    }
    if(U_FAILURE(status)) {
        return 0;
    }
    if(buffer!=0 && bufferLen>0){
        *buffer=0;
        if(UCAlen > 0) {
            u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
        }
        if(len > 0 && bufferLen > UCAlen) {
            u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
        }
    }
    return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
}
Ejemplo n.º 5
0
StringSearch::StringSearch(const UnicodeString     &pattern, 
                                 CharacterIterator &text,
                           const Locale            &locale, 
                                 BreakIterator     *breakiter,
                                 UErrorCode        &status) :
                           SearchIterator(text, breakiter), 
                           m_collator_(),
                           m_pattern_(pattern)
{
    if (U_FAILURE(status)) {
        m_strsrch_ = NULL;
        return;
    }
    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 
                              m_text_.getBuffer(), m_text_.length(), 
                              locale.getName(), (UBreakIterator *)breakiter, 
                              &status);
    uprv_free(m_search_);
    m_search_ = NULL;

    if (U_SUCCESS(status)) {
              int32_t  length;
        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
        m_collation_rules_.setTo(rules, length);
        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
                                 &m_collation_rules_);
        // m_search_ has been created by the base SearchIterator class
        m_search_ = m_strsrch_->search;
    }
}
Ejemplo n.º 6
0
void
RuleBasedCollator::setRuleStringFromCollator()
{
    int32_t length;
    const UChar *r = ucol_getRules(ucollator, &length);

    if (r && length > 0) {
        // alias the rules string
        urulestring.setTo(TRUE, r, length);
    }
    else {
        urulestring.truncate(0); // Clear string.
    }
}
Ejemplo n.º 7
0
StringSearch::StringSearch(const UnicodeString &pattern, 
                           const UnicodeString &text,
                           const Locale        &locale,       
                                 BreakIterator *breakiter,
                                 UErrorCode    &status) :
                           SearchIterator(text, breakiter), 
                           m_collator_(),
                           m_pattern_(pattern)
{
    if (U_FAILURE(status)) {
        m_strsrch_ = NULL;
        return;
    }

    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 
                              m_text_.getBuffer(), m_text_.length(), 
                              locale.getName(), (UBreakIterator *)breakiter, 
                              &status);
    uprv_free(m_search_);
    m_search_ = NULL;

	// !!! dlf m_collator_ is an odd beast.  basically it is an aliasing
	// wrapper around the internal collator and rules, which (here) are
	// owned by this stringsearch object.  this means 1) it's destructor
	// _should not_ delete the ucollator or rules, and 2) changes made
	// to the exposed collator (setStrength etc) _should_ modify the 
	// ucollator.  thus the collator is not a copy-on-write alias, and it
	// needs to distinguish itself not merely from 'stand alone' colators
	// but also from copy-on-write ones.  it needs additional state, which
	// setUCollator should set.

    if (U_SUCCESS(status)) {
              int32_t  length;
        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
        m_collation_rules_.setTo(rules, length);
        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
                                 &m_collation_rules_);
        // m_search_ has been created by the base SearchIterator class
        m_search_        = m_strsrch_->search;
    }
}
Ejemplo n.º 8
0
/**
 * Get a set containing the expansions defined by the collator. The set includes
 * both the UCA expansions and the expansions defined by the tailoring
 * @param coll collator
 * @param conts the set to hold the result
 * @param addPrefixes add the prefix contextual elements to contractions
 * @param status to hold the error code
 *
 * @draft ICU 3.4
 */
U_CAPI void U_EXPORT2
ucol_getContractionsAndExpansions( const UCollator *coll,
                  USet *contractions,
                  USet *expansions,
                  UBool addPrefixes,
                  UErrorCode *status)
{
    if(U_FAILURE(*status)) {
        return;
    }
    if(coll == NULL) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

    if(contractions) {
      uset_clear(contractions);
    }
    if(expansions) {
      uset_clear(expansions);
    }
    int32_t rulesLen = 0;
    const UChar* rules = ucol_getRules(coll, &rulesLen);
    UColTokenParser src;
    ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA,
                           ucol_tok_getRulesFromBundle, NULL, status);

    contContext c = { NULL, contractions, expansions, src.removeSet, addPrefixes, status };

    // Add the UCA contractions
    c.coll = coll->UCA;
    utrie_enum(&coll->UCA->mapping, NULL, _processSpecials, &c);

    // This is collator specific. Add contractions from a collator
    c.coll = coll;
    c.removedContractions =  NULL;
    utrie_enum(&coll->mapping, NULL, _processSpecials, &c);
    ucol_tok_closeTokenList(&src);
}
Ejemplo n.º 9
0
void
RuleBasedCollator::setRuleStringFromCollator(UErrorCode& status)
{
  urulestring = NULL;
  if (U_SUCCESS(status))
  {
    int32_t length;
    const UChar *r = ucol_getRules(ucollator, &length);
  
	if (length > 0) {
        // alias the rules string
        urulestring = new UnicodeString(TRUE, r, length);
    }
    else {
        urulestring = new UnicodeString();
    }
    /* test for NULL */
    if (urulestring == 0) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
  }
}
Ejemplo n.º 10
0
StringSearch::StringSearch(const UnicodeString     &pattern, 
                           const UnicodeString     &text,
                                 RuleBasedCollator *coll,       
                                 BreakIterator     *breakiter,
                                 UErrorCode        &status) :
                           SearchIterator(text, breakiter), 
                           m_collator_(),
                           m_pattern_(pattern)
{
    if (U_FAILURE(status)) {
        m_strsrch_ = NULL;
        return;
    }
    if (coll == NULL) {
        status     = U_ILLEGAL_ARGUMENT_ERROR;
        m_strsrch_ = NULL;
        return;
    }
    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 
                                          m_pattern_.length(), 
                                          m_text_.getBuffer(), 
                                          m_text_.length(), coll->ucollator, 
                                          (UBreakIterator *)breakiter, 
                                          &status);
    uprv_free(m_search_);
    m_search_ = NULL;

    if (U_SUCCESS(status)) {
              int32_t  length;
        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
        m_collation_rules_.setTo(rules, length);
        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
                                 &m_collation_rules_);
        // m_search_ has been created by the base SearchIterator class
        m_search_ = m_strsrch_->search;
    }
}
Ejemplo n.º 11
0
// operator overloading ---------------------------------------------
StringSearch & StringSearch::operator=(const StringSearch &that)
{
    if ((*this) != that) {
        UErrorCode status = U_ZERO_ERROR;
        m_text_          = that.m_text_;
        m_breakiterator_ = that.m_breakiterator_;
        m_pattern_       = that.m_pattern_;
        // all m_search_ in the parent class is linked up with m_strsrch_
        usearch_close(m_strsrch_);
        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 
                                              m_pattern_.length(), 
                                              m_text_.getBuffer(), 
                                              m_text_.length(), 
                                              that.m_strsrch_->collator, 
                                              NULL, &status);
        int32_t  length;
        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
        m_collation_rules_.setTo(rules, length);
        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
                                 &m_collation_rules_);
        m_search_ = m_strsrch_->search;
    }
    return *this;
}
Ejemplo n.º 12
0
int32_t RuleBasedCollator::hashCode() const
{
    int32_t length;
    const UChar *rules = ucol_getRules(ucollator, &length);
    return uhash_hashUCharsN(rules, length);
}
Ejemplo n.º 13
0
RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
                                           UErrorCode& status) :
                                     dataIsOwned(FALSE), ucollator(0), urulestring(0)
{
  if (U_FAILURE(status))
    return;

  /*
  Try to load, in order:
   1. The desired locale's collation.
   2. A fallback of the desired locale.
   3. The default locale's collation.
   4. A fallback of the default locale.
   5. The default collation rules, which contains en_US collation rules.

   To reiterate, we try:
   Specific:
    language+country+variant
    language+country
    language
   Default:
    language+country+variant
    language+country
    language
   Root: (aka DEFAULTRULES)
   steps 1-5 are handled by resource bundle fallback mechanism.
   however, in a very unprobable situation that no resource bundle
   data exists, step 5 is repeated with hardcoded default rules.
  */

  setUCollator(desiredLocale, status);

  if (U_FAILURE(status))
  {
    status = U_ZERO_ERROR;

    setUCollator(kRootLocaleName, status);
    if (status == U_ZERO_ERROR) {
        status = U_USING_DEFAULT_WARNING;
    }
  }

  if (U_SUCCESS(status))
  {
    int32_t length;
    const UChar *r = ucol_getRules(ucollator, &length);
    if (length > 0) {
        // alias the rules string
        urulestring = new UnicodeString(TRUE, r, length);
    }
    else {
        urulestring = new UnicodeString();
    }
    /* test for NULL */
    if (urulestring == 0) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    dataIsOwned = TRUE;
	isWriteThroughAlias = FALSE;
  }

  return;
}
U_CAPI UBool U_EXPORT2
ucol_equals(const UCollator *source, const UCollator *target) {
    UErrorCode status = U_ZERO_ERROR;
    // if pointers are equal, collators are equal
    if(source == target) {
        return TRUE;
    }
    int32_t i = 0, j = 0;
    // if any of attributes are different, collators are not equal
    for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
        if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
            return FALSE;
        }
    }

    int32_t sourceRulesLen = 0, targetRulesLen = 0;
    const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
    const UChar *targetRules = ucol_getRules(target, &targetRulesLen);

    if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
        // all the attributes are equal and the rules are equal - collators are equal
        return(TRUE);
    }
    // hard part, need to construct tree from rules and see if they yield the same tailoring
    UBool result = TRUE;
    UParseError parseError;
    UColTokenParser sourceParser, targetParser;
    int32_t sourceListLen = 0, targetListLen = 0;
    ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status);
    ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status);
    sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
    targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);

    if(sourceListLen != targetListLen) {
        // different number of resets
        result = FALSE;
    } else {
        UColToken *sourceReset = NULL, *targetReset = NULL;
        UChar *sourceResetString = NULL, *targetResetString = NULL;
        int32_t sourceStringLen = 0, targetStringLen = 0;
        for(i = 0; i < sourceListLen; i++) {
            sourceReset = sourceParser.lh[i].reset;
            sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
            sourceStringLen = sourceReset->source >> 24;
            for(j = 0; j < sourceListLen; j++) {
                targetReset = targetParser.lh[j].reset;
                targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
                targetStringLen = targetReset->source >> 24;
                if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
                    sourceReset = sourceParser.lh[i].first;
                    targetReset = targetParser.lh[j].first;
                    while(sourceReset != NULL && targetReset != NULL) {
                        sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
                        sourceStringLen = sourceReset->source >> 24;
                        targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
                        targetStringLen = targetReset->source >> 24;
                        if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
                            result = FALSE;
                            goto returnResult;
                        }
                        // probably also need to check the expansions
                        if(sourceReset->expansion) {
                            if(!targetReset->expansion) {
                                result = FALSE;
                                goto returnResult;
                            } else {
                                // compare expansions
                                sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
                                sourceStringLen = sourceReset->expansion >> 24;
                                targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
                                targetStringLen = targetReset->expansion >> 24;
                                if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
                                    result = FALSE;
                                    goto returnResult;
                                }
                            }
                        } else {
                            if(targetReset->expansion) {
                                result = FALSE;
                                goto returnResult;
                            }
                        }
                        sourceReset = sourceReset->next;
                        targetReset = targetReset->next;
                    }
                    if(sourceReset != targetReset) { // at least one is not NULL
                        // there are more tailored elements in one list
                        result = FALSE;
                        goto returnResult;
                    }


                    break;
                }
            }
            // couldn't find the reset anchor, so the collators are not equal
            if(j == sourceListLen) {
                result = FALSE;
                goto returnResult;
            }
        }
Ejemplo n.º 15
0
/*
 * The collator returned by this function is owned by the callee and must be
 * closed when this method returns with a U_SUCCESS UErrorCode.
 *
 * On error, the return value is undefined.
 */
UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr)
{
    UColAttributeValue strength = ucol_getStrength(pCollator);

    bool isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase;
    bool isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace;
    bool isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols;

    if (isIgnoreCase)
    {
        strength = UCOL_SECONDARY;
    }

    if (isIgnoreNonSpace)
    {
        strength = UCOL_PRIMARY;
    }

    UCollator* pClonedCollator;
    std::vector<UChar> customRules = GetCustomRules(options, strength, isIgnoreSymbols);
    if (customRules.empty())
    {
        pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr);
    }
    else
    {
        int32_t customRuleLength = customRules.size();

        int32_t localeRulesLength;
        const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength);

        std::vector<UChar> completeRules(localeRulesLength + customRuleLength + 1, '\0');
        for (int i = 0; i < localeRulesLength; i++)
        {
            completeRules[i] = localeRules[i];
        }
        for (int i = 0; i < customRuleLength; i++)
        {
            completeRules[localeRulesLength + i] = customRules[i];
        }

        pClonedCollator = ucol_openRules(completeRules.data(), completeRules.size(), UCOL_DEFAULT, strength, NULL, pErr);
    }

    if (isIgnoreSymbols)
    {
        ucol_setAttribute(pClonedCollator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, pErr);

        // by default, ICU alternate shifted handling only ignores punctuation, but
        // IgnoreSymbols needs symbols and currency as well, so change the "variable top"
        // to include all symbols and currency
#if HAVE_SET_MAX_VARIABLE
        ucol_setMaxVariable(pClonedCollator, UCOL_REORDER_CODE_CURRENCY, pErr);
#else
        // 0xfdfc is the last currency character before the first digit character
        // in http://source.icu-project.org/repos/icu/icu/tags/release-52-1/source/data/unidata/FractionalUCA.txt
        const UChar ignoreSymbolsVariableTop[] = { 0xfdfc };
        ucol_setVariableTop(pClonedCollator, ignoreSymbolsVariableTop, 1, pErr);
#endif
    }

    ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, strength, pErr);

    // casing differs at the tertiary level.
    // if strength is less than tertiary, but we are not ignoring case, then we need to flip CASE_LEVEL On
    if (strength < UCOL_TERTIARY && !isIgnoreCase)
    {
        ucol_setAttribute(pClonedCollator, UCOL_CASE_LEVEL, UCOL_ON, pErr);
    }

    return pClonedCollator;
}
static jstring NativeCollation_getRules(JNIEnv* env, jclass, jlong address) {
    int32_t length = 0;
    const UChar* rules = ucol_getRules(toCollator(address), &length);
    return env->NewString(rules, length);
}