//static jstring NativeCollation_getRules(JNIEnv* env, jclass, jint address) { JNIEXPORT jstring JNICALL Java_com_ibm_icu4jni_text_NativeCollation_getRules(JNIEnv* env, jclass, jint address) { int32_t length = 0; const UChar* rules = ucol_getRules(toCollator(address), &length); return env->NewString((const jchar *) rules, length); }
StringSearch::StringSearch(const StringSearch &that) : SearchIterator(that.m_text_, that.m_breakiterator_), m_collator_(), m_pattern_(that.m_pattern_) { UErrorCode status = U_ZERO_ERROR; if (that.m_strsrch_ == NULL) { m_strsrch_ = NULL; status = U_ILLEGAL_ARGUMENT_ERROR; } else { m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), m_pattern_.length(), m_text_.getBuffer(), m_text_.length(), that.m_strsrch_->collator, (UBreakIterator *)that.m_breakiterator_, &status); } uprv_free(m_search_); m_search_ = NULL; if (U_SUCCESS(status)) { int32_t length; const UChar *rules = ucol_getRules(m_strsrch_->collator, &length); m_collation_rules_.setTo(rules, length); m_collator_.setUCollator((UCollator *)m_strsrch_->collator, &m_collation_rules_); // m_search_ has been created by the base SearchIterator class m_search_ = m_strsrch_->search; m_breakiterator_ = that.m_breakiterator_; } }
/* * The collator returned by this function is owned by the callee and must be * closed when this method returns with a U_SUCCESS UErrorCode. * * On error, the return value is undefined. */ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr) { UColAttributeValue strength = ucol_getStrength(pCollator); bool isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase; bool isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace; bool isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols; if (isIgnoreCase) { strength = UCOL_SECONDARY; } if (isIgnoreNonSpace) { strength = UCOL_PRIMARY; } UCollator* pClonedCollator; std::vector<UChar> customRules = GetCustomRules(options, strength, isIgnoreSymbols); if (customRules.empty()) { pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr); } else { int32_t customRuleLength = customRules.size(); int32_t localeRulesLength; const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength); std::vector<UChar> completeRules(localeRulesLength + customRuleLength + 1, '\0'); for (int i = 0; i < localeRulesLength; i++) { completeRules[i] = localeRules[i]; } for (int i = 0; i < customRuleLength; i++) { completeRules[localeRulesLength + i] = customRules[i]; } pClonedCollator = ucol_openRules(completeRules.data(), completeRules.size(), UCOL_DEFAULT, strength, NULL, pErr); } if (isIgnoreSymbols) { ucol_setAttribute(pClonedCollator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, pErr); } ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, strength, pErr); // casing differs at the tertiary level. // if strength is less than tertiary, but we are not ignoring case, then we need to flip CASE_LEVEL On if (strength < UCOL_TERTIARY && !isIgnoreCase) { ucol_setAttribute(pClonedCollator, UCOL_CASE_LEVEL, UCOL_ON, pErr); } return pClonedCollator; }
U_CAPI int32_t U_EXPORT2 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) { UErrorCode status = U_ZERO_ERROR; int32_t len = 0; int32_t UCAlen = 0; const UChar* ucaRules = 0; const UChar *rules = ucol_getRules(coll, &len); if(delta == UCOL_FULL_RULES) { /* take the UCA rules and append real rules at the end */ /* UCA rules will be probably coming from the root RB */ ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status); /* UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status); UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status); ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status); ures_close(uca); ures_close(cresb); */ } if(U_FAILURE(status)) { return 0; } if(buffer!=0 && bufferLen>0){ *buffer=0; if(UCAlen > 0) { u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen)); } if(len > 0 && bufferLen > UCAlen) { u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen)); } } return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status); }
StringSearch::StringSearch(const UnicodeString &pattern, CharacterIterator &text, const Locale &locale, BreakIterator *breakiter, UErrorCode &status) : SearchIterator(text, breakiter), m_collator_(), m_pattern_(pattern) { if (U_FAILURE(status)) { m_strsrch_ = NULL; return; } m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), m_text_.getBuffer(), m_text_.length(), locale.getName(), (UBreakIterator *)breakiter, &status); uprv_free(m_search_); m_search_ = NULL; if (U_SUCCESS(status)) { int32_t length; const UChar *rules = ucol_getRules(m_strsrch_->collator, &length); m_collation_rules_.setTo(rules, length); m_collator_.setUCollator((UCollator *)m_strsrch_->collator, &m_collation_rules_); // m_search_ has been created by the base SearchIterator class m_search_ = m_strsrch_->search; } }
void RuleBasedCollator::setRuleStringFromCollator() { int32_t length; const UChar *r = ucol_getRules(ucollator, &length); if (r && length > 0) { // alias the rules string urulestring.setTo(TRUE, r, length); } else { urulestring.truncate(0); // Clear string. } }
StringSearch::StringSearch(const UnicodeString &pattern, const UnicodeString &text, const Locale &locale, BreakIterator *breakiter, UErrorCode &status) : SearchIterator(text, breakiter), m_collator_(), m_pattern_(pattern) { if (U_FAILURE(status)) { m_strsrch_ = NULL; return; } m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), m_text_.getBuffer(), m_text_.length(), locale.getName(), (UBreakIterator *)breakiter, &status); uprv_free(m_search_); m_search_ = NULL; // !!! dlf m_collator_ is an odd beast. basically it is an aliasing // wrapper around the internal collator and rules, which (here) are // owned by this stringsearch object. this means 1) it's destructor // _should not_ delete the ucollator or rules, and 2) changes made // to the exposed collator (setStrength etc) _should_ modify the // ucollator. thus the collator is not a copy-on-write alias, and it // needs to distinguish itself not merely from 'stand alone' colators // but also from copy-on-write ones. it needs additional state, which // setUCollator should set. if (U_SUCCESS(status)) { int32_t length; const UChar *rules = ucol_getRules(m_strsrch_->collator, &length); m_collation_rules_.setTo(rules, length); m_collator_.setUCollator((UCollator *)m_strsrch_->collator, &m_collation_rules_); // m_search_ has been created by the base SearchIterator class m_search_ = m_strsrch_->search; } }
/** * Get a set containing the expansions defined by the collator. The set includes * both the UCA expansions and the expansions defined by the tailoring * @param coll collator * @param conts the set to hold the result * @param addPrefixes add the prefix contextual elements to contractions * @param status to hold the error code * * @draft ICU 3.4 */ U_CAPI void U_EXPORT2 ucol_getContractionsAndExpansions( const UCollator *coll, USet *contractions, USet *expansions, UBool addPrefixes, UErrorCode *status) { if(U_FAILURE(*status)) { return; } if(coll == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return; } if(contractions) { uset_clear(contractions); } if(expansions) { uset_clear(expansions); } int32_t rulesLen = 0; const UChar* rules = ucol_getRules(coll, &rulesLen); UColTokenParser src; ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, ucol_tok_getRulesFromBundle, NULL, status); contContext c = { NULL, contractions, expansions, src.removeSet, addPrefixes, status }; // Add the UCA contractions c.coll = coll->UCA; utrie_enum(&coll->UCA->mapping, NULL, _processSpecials, &c); // This is collator specific. Add contractions from a collator c.coll = coll; c.removedContractions = NULL; utrie_enum(&coll->mapping, NULL, _processSpecials, &c); ucol_tok_closeTokenList(&src); }
void RuleBasedCollator::setRuleStringFromCollator(UErrorCode& status) { urulestring = NULL; if (U_SUCCESS(status)) { int32_t length; const UChar *r = ucol_getRules(ucollator, &length); if (length > 0) { // alias the rules string urulestring = new UnicodeString(TRUE, r, length); } else { urulestring = new UnicodeString(); } /* test for NULL */ if (urulestring == 0) { status = U_MEMORY_ALLOCATION_ERROR; return; } } }
StringSearch::StringSearch(const UnicodeString &pattern, const UnicodeString &text, RuleBasedCollator *coll, BreakIterator *breakiter, UErrorCode &status) : SearchIterator(text, breakiter), m_collator_(), m_pattern_(pattern) { if (U_FAILURE(status)) { m_strsrch_ = NULL; return; } if (coll == NULL) { status = U_ILLEGAL_ARGUMENT_ERROR; m_strsrch_ = NULL; return; } m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), m_pattern_.length(), m_text_.getBuffer(), m_text_.length(), coll->ucollator, (UBreakIterator *)breakiter, &status); uprv_free(m_search_); m_search_ = NULL; if (U_SUCCESS(status)) { int32_t length; const UChar *rules = ucol_getRules(m_strsrch_->collator, &length); m_collation_rules_.setTo(rules, length); m_collator_.setUCollator((UCollator *)m_strsrch_->collator, &m_collation_rules_); // m_search_ has been created by the base SearchIterator class m_search_ = m_strsrch_->search; } }
// operator overloading --------------------------------------------- StringSearch & StringSearch::operator=(const StringSearch &that) { if ((*this) != that) { UErrorCode status = U_ZERO_ERROR; m_text_ = that.m_text_; m_breakiterator_ = that.m_breakiterator_; m_pattern_ = that.m_pattern_; // all m_search_ in the parent class is linked up with m_strsrch_ usearch_close(m_strsrch_); m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), m_pattern_.length(), m_text_.getBuffer(), m_text_.length(), that.m_strsrch_->collator, NULL, &status); int32_t length; const UChar *rules = ucol_getRules(m_strsrch_->collator, &length); m_collation_rules_.setTo(rules, length); m_collator_.setUCollator((UCollator *)m_strsrch_->collator, &m_collation_rules_); m_search_ = m_strsrch_->search; } return *this; }
int32_t RuleBasedCollator::hashCode() const { int32_t length; const UChar *rules = ucol_getRules(ucollator, &length); return uhash_hashUCharsN(rules, length); }
RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status) : dataIsOwned(FALSE), ucollator(0), urulestring(0) { if (U_FAILURE(status)) return; /* Try to load, in order: 1. The desired locale's collation. 2. A fallback of the desired locale. 3. The default locale's collation. 4. A fallback of the default locale. 5. The default collation rules, which contains en_US collation rules. To reiterate, we try: Specific: language+country+variant language+country language Default: language+country+variant language+country language Root: (aka DEFAULTRULES) steps 1-5 are handled by resource bundle fallback mechanism. however, in a very unprobable situation that no resource bundle data exists, step 5 is repeated with hardcoded default rules. */ setUCollator(desiredLocale, status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; setUCollator(kRootLocaleName, status); if (status == U_ZERO_ERROR) { status = U_USING_DEFAULT_WARNING; } } if (U_SUCCESS(status)) { int32_t length; const UChar *r = ucol_getRules(ucollator, &length); if (length > 0) { // alias the rules string urulestring = new UnicodeString(TRUE, r, length); } else { urulestring = new UnicodeString(); } /* test for NULL */ if (urulestring == 0) { status = U_MEMORY_ALLOCATION_ERROR; return; } dataIsOwned = TRUE; isWriteThroughAlias = FALSE; } return; }
U_CAPI UBool U_EXPORT2 ucol_equals(const UCollator *source, const UCollator *target) { UErrorCode status = U_ZERO_ERROR; // if pointers are equal, collators are equal if(source == target) { return TRUE; } int32_t i = 0, j = 0; // if any of attributes are different, collators are not equal for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) { return FALSE; } } int32_t sourceRulesLen = 0, targetRulesLen = 0; const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen); const UChar *targetRules = ucol_getRules(target, &targetRulesLen); if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) { // all the attributes are equal and the rules are equal - collators are equal return(TRUE); } // hard part, need to construct tree from rules and see if they yield the same tailoring UBool result = TRUE; UParseError parseError; UColTokenParser sourceParser, targetParser; int32_t sourceListLen = 0, targetListLen = 0; ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status); ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status); sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status); targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status); if(sourceListLen != targetListLen) { // different number of resets result = FALSE; } else { UColToken *sourceReset = NULL, *targetReset = NULL; UChar *sourceResetString = NULL, *targetResetString = NULL; int32_t sourceStringLen = 0, targetStringLen = 0; for(i = 0; i < sourceListLen; i++) { sourceReset = sourceParser.lh[i].reset; sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); sourceStringLen = sourceReset->source >> 24; for(j = 0; j < sourceListLen; j++) { targetReset = targetParser.lh[j].reset; targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); targetStringLen = targetReset->source >> 24; if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) { sourceReset = sourceParser.lh[i].first; targetReset = targetParser.lh[j].first; while(sourceReset != NULL && targetReset != NULL) { sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); sourceStringLen = sourceReset->source >> 24; targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); targetStringLen = targetReset->source >> 24; if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { result = FALSE; goto returnResult; } // probably also need to check the expansions if(sourceReset->expansion) { if(!targetReset->expansion) { result = FALSE; goto returnResult; } else { // compare expansions sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF); sourceStringLen = sourceReset->expansion >> 24; targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF); targetStringLen = targetReset->expansion >> 24; if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { result = FALSE; goto returnResult; } } } else { if(targetReset->expansion) { result = FALSE; goto returnResult; } } sourceReset = sourceReset->next; targetReset = targetReset->next; } if(sourceReset != targetReset) { // at least one is not NULL // there are more tailored elements in one list result = FALSE; goto returnResult; } break; } } // couldn't find the reset anchor, so the collators are not equal if(j == sourceListLen) { result = FALSE; goto returnResult; } }
/* * The collator returned by this function is owned by the callee and must be * closed when this method returns with a U_SUCCESS UErrorCode. * * On error, the return value is undefined. */ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr) { UColAttributeValue strength = ucol_getStrength(pCollator); bool isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase; bool isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace; bool isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols; if (isIgnoreCase) { strength = UCOL_SECONDARY; } if (isIgnoreNonSpace) { strength = UCOL_PRIMARY; } UCollator* pClonedCollator; std::vector<UChar> customRules = GetCustomRules(options, strength, isIgnoreSymbols); if (customRules.empty()) { pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr); } else { int32_t customRuleLength = customRules.size(); int32_t localeRulesLength; const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength); std::vector<UChar> completeRules(localeRulesLength + customRuleLength + 1, '\0'); for (int i = 0; i < localeRulesLength; i++) { completeRules[i] = localeRules[i]; } for (int i = 0; i < customRuleLength; i++) { completeRules[localeRulesLength + i] = customRules[i]; } pClonedCollator = ucol_openRules(completeRules.data(), completeRules.size(), UCOL_DEFAULT, strength, NULL, pErr); } if (isIgnoreSymbols) { ucol_setAttribute(pClonedCollator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, pErr); // by default, ICU alternate shifted handling only ignores punctuation, but // IgnoreSymbols needs symbols and currency as well, so change the "variable top" // to include all symbols and currency #if HAVE_SET_MAX_VARIABLE ucol_setMaxVariable(pClonedCollator, UCOL_REORDER_CODE_CURRENCY, pErr); #else // 0xfdfc is the last currency character before the first digit character // in http://source.icu-project.org/repos/icu/icu/tags/release-52-1/source/data/unidata/FractionalUCA.txt const UChar ignoreSymbolsVariableTop[] = { 0xfdfc }; ucol_setVariableTop(pClonedCollator, ignoreSymbolsVariableTop, 1, pErr); #endif } ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, strength, pErr); // casing differs at the tertiary level. // if strength is less than tertiary, but we are not ignoring case, then we need to flip CASE_LEVEL On if (strength < UCOL_TERTIARY && !isIgnoreCase) { ucol_setAttribute(pClonedCollator, UCOL_CASE_LEVEL, UCOL_ON, pErr); } return pClonedCollator; }
static jstring NativeCollation_getRules(JNIEnv* env, jclass, jlong address) { int32_t length = 0; const UChar* rules = ucol_getRules(toCollator(address), &length); return env->NewString(rules, length); }