/* * The collator returned by this function is owned by the callee and must be * closed when this method returns with a U_SUCCESS UErrorCode. * * On error, the return value is undefined. */ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr) { UColAttributeValue strength = ucol_getStrength(pCollator); bool isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase; bool isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace; bool isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols; if (isIgnoreCase) { strength = UCOL_SECONDARY; } if (isIgnoreNonSpace) { strength = UCOL_PRIMARY; } UCollator* pClonedCollator; std::vector<UChar> customRules = GetCustomRules(options, strength, isIgnoreSymbols); if (customRules.empty()) { pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr); } else { int32_t customRuleLength = customRules.size(); int32_t localeRulesLength; const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength); std::vector<UChar> completeRules(localeRulesLength + customRuleLength + 1, '\0'); for (int i = 0; i < localeRulesLength; i++) { completeRules[i] = localeRules[i]; } for (int i = 0; i < customRuleLength; i++) { completeRules[localeRulesLength + i] = customRules[i]; } pClonedCollator = ucol_openRules(completeRules.data(), completeRules.size(), UCOL_DEFAULT, strength, NULL, pErr); } if (isIgnoreSymbols) { ucol_setAttribute(pClonedCollator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, pErr); } ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, strength, pErr); // casing differs at the tertiary level. // if strength is less than tertiary, but we are not ignoring case, then we need to flip CASE_LEVEL On if (strength < UCOL_TERTIARY && !isIgnoreCase) { ucol_setAttribute(pClonedCollator, UCOL_CASE_LEVEL, UCOL_ON, pErr); } return pClonedCollator; }
UCollator* Collator::createUCollator(const char *loc, UErrorCode *status) { UCollator *result = 0; if (status && U_SUCCESS(*status) && hasService()) { Locale desiredLocale(loc); Collator *col = (Collator*)gService->get(desiredLocale, *status); RuleBasedCollator *rbc; if (col && (rbc = dynamic_cast<RuleBasedCollator *>(col))) { if (!rbc->dataIsOwned) { result = ucol_safeClone(rbc->ucollator, NULL, NULL, status); } else { result = rbc->ucollator; rbc->ucollator = NULL; // to prevent free on delete } } else { // should go in a function- ucol_initDelegate(delegate) result = (UCollator *)uprv_malloc(sizeof(UCollator)); if(result == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; } else { uprv_memset(result, 0, sizeof(UCollator)); result->delegate = col; result->freeOnClose = TRUE; // do free on close. col = NULL; // to prevent free on delete. } } delete col; } return result; }
static jlong NativeCollation_safeClone(JNIEnv* env, jclass, jlong address) { UErrorCode status = U_ZERO_ERROR; jint bufferSize = U_COL_SAFECLONE_BUFFERSIZE; UCollator* c = ucol_safeClone(toCollator(address), NULL, &bufferSize, &status); maybeThrowIcuException(env, "ucol_safeClone", status); return static_cast<jlong>(reinterpret_cast<uintptr_t>(c)); }
static jint NativeCollation_safeClone(JNIEnv* env, jclass, jint address) { UErrorCode status = U_ZERO_ERROR; jint bufferSize = U_COL_SAFECLONE_BUFFERSIZE; UCollator* c = ucol_safeClone(toCollator(address), NULL, &bufferSize, &status); icu4jni_error(env, status); return static_cast<jint>(reinterpret_cast<uintptr_t>(c)); }
UCollator* __hs_ucol_safeClone(const UCollator *coll, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) { return ucol_safeClone(coll, stackBuffer, pBufferSize, status); }
void RuleBasedCollator::checkOwned() { if (!(dataIsOwned || isWriteThroughAlias)) { UErrorCode status = U_ZERO_ERROR; ucollator = ucol_safeClone(ucollator, NULL, NULL, &status); setRuleStringFromCollator(); dataIsOwned = TRUE; isWriteThroughAlias = FALSE; } }
static char* collator_clone(char* obj) { UErrorCode status = U_ZERO_ERROR; int32_t size = U_COL_SAFECLONE_BUFFERSIZE; obj = (char*) ucol_safeClone( (UCollator*) obj, NULL, &size, &status ); if(U_FAILURE(status)) { return NULL; } return obj; }
Collator* RuleBasedCollator::safeClone(void) { UErrorCode intStatus = U_ZERO_ERROR; int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize, &intStatus); if (U_FAILURE(intStatus)) { return NULL; } UnicodeString *r = new UnicodeString(*urulestring); RuleBasedCollator *result = new RuleBasedCollator(ucol, r); result->dataIsOwned = TRUE; result->isWriteThroughAlias = FALSE; return result; }
static int Matcher_init(Matcher *self, PyObject *args, PyObject *kwds) { PyObject *items = NULL, *p = NULL, *py_items = NULL, *level1 = NULL, *level2 = NULL, *level3 = NULL, *collator = NULL; int32_t i = 0; UErrorCode status = U_ZERO_ERROR; UCollator *col = NULL; if (!PyArg_ParseTuple(args, "OOOOO", &items, &collator, &level1, &level2, &level3)) return -1; // Clone the passed in collator (cloning is needed as collators are not thread safe) if (!PyCapsule_CheckExact(collator)) { PyErr_SetString(PyExc_TypeError, "Collator must be a capsule"); return -1; } col = (UCollator*)PyCapsule_GetPointer(collator, NULL); if (col == NULL) return -1; self->collator = ucol_safeClone(col, NULL, NULL, &status); col = NULL; if (U_FAILURE(status)) { self->collator = NULL; PyErr_SetString(PyExc_ValueError, u_errorName(status)); return -1; } py_items = PySequence_Fast(items, "Must pass in two sequence objects"); if (py_items == NULL) goto end; self->item_count = (uint32_t)PySequence_Size(items); self->items = (UChar**)calloc(self->item_count, sizeof(UChar*)); self->item_lengths = (int32_t*)calloc(self->item_count, sizeof(uint32_t)); self->level1 = python_to_icu(level1, NULL); self->level2 = python_to_icu(level2, NULL); self->level3 = python_to_icu(level3, NULL); if (self->items == NULL || self->item_lengths == NULL ) { PyErr_NoMemory(); goto end; } if (self->level1 == NULL || self->level2 == NULL || self->level3 == NULL) goto end; for (i = 0; i < (int32_t)self->item_count; i++) { p = PySequence_Fast_GET_ITEM(py_items, i); self->items[i] = python_to_icu(p, self->item_lengths + i); if (self->items[i] == NULL) { PyErr_NoMemory(); goto end; } } end: Py_XDECREF(py_items); if (PyErr_Occurred()) { free_matcher(self); } return (PyErr_Occurred()) ? -1 : 0; }
Collator* RuleBasedCollator::safeClone(void) { UErrorCode intStatus = U_ZERO_ERROR; int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize, &intStatus); if (U_FAILURE(intStatus)) { return NULL; } RuleBasedCollator *result = new RuleBasedCollator(); // Null pointer check if (result != NULL) { result->ucollator = ucol; result->dataIsOwned = TRUE; result->isWriteThroughAlias = FALSE; setRuleStringFromCollator(); } return result; }
UCollator* Collator::createUCollator(const char *loc, UErrorCode *status) { UCollator *result = 0; if (status && U_SUCCESS(*status) && hasService()) { Locale desiredLocale(loc); Collator *col = (Collator*)gService->get(desiredLocale, *status); if (col && col->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) { RuleBasedCollator *rbc = (RuleBasedCollator *)col; if (!rbc->dataIsOwned) { result = ucol_safeClone(rbc->ucollator, NULL, NULL, status); } else { result = rbc->ucollator; rbc->ucollator = NULL; // to prevent free on delete } } delete col; } return result; }
// aliasing, not write-through RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that) { if (this != &that) { if (dataIsOwned) { ucol_close(ucollator); } urulestring.truncate(0); // empty the rule string dataIsOwned = TRUE; isWriteThroughAlias = FALSE; UErrorCode intStatus = U_ZERO_ERROR; int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize, &intStatus); if (U_SUCCESS(intStatus)) { setRuleStringFromCollator(); } } return *this; }
// Collator.clone {{{ static PyObject* icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs) { UCollator *collator; UErrorCode status = U_ZERO_ERROR; int32_t bufsize = -1; icu_Collator *clone; collator = ucol_safeClone(self->collator, NULL, &bufsize, &status); if (collator == NULL || U_FAILURE(status)) { PyErr_SetString(PyExc_Exception, "Failed to create collator."); return NULL; } clone = PyObject_New(icu_Collator, &icu_CollatorType); if (clone == NULL) return PyErr_NoMemory(); clone->collator = collator; clone->contractions = NULL; return (PyObject*) clone; } // }}}
CollData::CollData(UCollator *collator, UErrorCode &status) : coll(NULL), ceToCharsStartingWith(NULL) { // [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]] // i.e. other, control, private use, format, surrogate U_STRING_DECL(test_pattern, "[[:assigned:]-[:c:]]", 20); U_STRING_INIT(test_pattern, "[[:assigned:]-[:c:]]", 20); USet *charsToTest = uset_openPattern(test_pattern, 20, &status); // Han ext. A, Han, Jamo, Hangul, Han Ext. B // i.e. all the characers we handle implicitly U_STRING_DECL(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70); U_STRING_INIT(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70); USet *charsToRemove = uset_openPattern(remove_pattern, 70, &status); if (U_FAILURE(status)) { return; } USet *expansions = uset_openEmpty(); USet *contractions = uset_openEmpty(); int32_t itemCount; ceToCharsStartingWith = new CEToStringsMap(status); if (U_FAILURE(status)) { goto bail; } #ifdef CLONE_COLLATOR coll = ucol_safeClone(collator, NULL, NULL, &status); if (U_FAILURE(status)) { goto bail; } #else coll = collator; #endif ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status); uset_addAll(charsToTest, contractions); uset_addAll(charsToTest, expansions); uset_removeAll(charsToTest, charsToRemove); itemCount = uset_getItemCount(charsToTest); for(int32_t item = 0; item < itemCount; item += 1) { UChar32 start = 0, end = 0; UChar buffer[16]; int32_t len = uset_getItem(charsToTest, item, &start, &end, buffer, 16, &status); if (len == 0) { for (UChar32 ch = start; ch <= end; ch += 1) { UnicodeString *st = new UnicodeString(ch); if (st == NULL) { status = U_MEMORY_ALLOCATION_ERROR; break; } CEList *ceList = new CEList(coll, *st, status); ceToCharsStartingWith->put(ceList->get(0), st, status); delete ceList; delete st; } } else if (len > 0) { UnicodeString *st = new UnicodeString(buffer, len); if (st == NULL) { status = U_MEMORY_ALLOCATION_ERROR; break; } CEList *ceList = new CEList(coll, *st, status); ceToCharsStartingWith->put(ceList->get(0), st, status); delete ceList; delete st; } else { // shouldn't happen... } if (U_FAILURE(status)) { break; } } bail: uset_close(contractions); uset_close(expansions); uset_close(charsToRemove); uset_close(charsToTest); if (U_FAILURE(status)) { return; } UnicodeSet hanRanges(UNICODE_STRING_SIMPLE("[:Unified_Ideograph:]"), status); if (U_FAILURE(status)) { return; } UnicodeSetIterator hanIter(hanRanges); UnicodeString hanString; while(hanIter.nextRange()) { hanString.append(hanIter.getCodepoint()); hanString.append(hanIter.getCodepointEnd()); } // TODO: Why U+11FF? The old code had an outdated UCOL_LAST_T_JAMO=0x11F9, // but as of Unicode 6.3 the 11xx block is filled, // and there are also more Jamo T at U+D7CB..U+D7FB. // Maybe use [:HST=T:] and look for the end of the last range? // Maybe use script boundary mappings instead of this code?? UChar jamoRanges[] = {Hangul::JAMO_L_BASE, Hangul::JAMO_V_BASE, Hangul::JAMO_T_BASE + 1, 0x11FF}; UnicodeString jamoString(FALSE, jamoRanges, UPRV_LENGTHOF(jamoRanges)); CEList hanList(coll, hanString, status); CEList jamoList(coll, jamoString, status); int32_t j = 0; if (U_FAILURE(status)) { return; } for (int32_t c = 0; c < jamoList.size(); c += 1) { uint32_t jce = jamoList[c]; if (! isContinuation(jce)) { jamoLimits[j++] = jce; } } jamoLimits[3] += (1 << UCOL_PRIMARYORDERSHIFT); minHan = 0xFFFFFFFF; maxHan = 0; for(int32_t h = 0; h < hanList.size(); h += 2) { uint32_t han = (uint32_t) hanList[h]; if (han < minHan) { minHan = han; } if (han > maxHan) { maxHan = han; } } maxHan += (1 << UCOL_PRIMARYORDERSHIFT); }
/* * The collator returned by this function is owned by the callee and must be * closed when this method returns with a U_SUCCESS UErrorCode. * * On error, the return value is undefined. */ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr) { UColAttributeValue strength = ucol_getStrength(pCollator); bool isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase; bool isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace; bool isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols; if (isIgnoreCase) { strength = UCOL_SECONDARY; } if (isIgnoreNonSpace) { strength = UCOL_PRIMARY; } UCollator* pClonedCollator; std::vector<UChar> customRules = GetCustomRules(options, strength, isIgnoreSymbols); if (customRules.empty()) { pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr); } else { int32_t customRuleLength = customRules.size(); int32_t localeRulesLength; const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength); std::vector<UChar> completeRules(localeRulesLength + customRuleLength + 1, '\0'); for (int i = 0; i < localeRulesLength; i++) { completeRules[i] = localeRules[i]; } for (int i = 0; i < customRuleLength; i++) { completeRules[localeRulesLength + i] = customRules[i]; } pClonedCollator = ucol_openRules(completeRules.data(), completeRules.size(), UCOL_DEFAULT, strength, NULL, pErr); } if (isIgnoreSymbols) { ucol_setAttribute(pClonedCollator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, pErr); // by default, ICU alternate shifted handling only ignores punctuation, but // IgnoreSymbols needs symbols and currency as well, so change the "variable top" // to include all symbols and currency #if HAVE_SET_MAX_VARIABLE ucol_setMaxVariable(pClonedCollator, UCOL_REORDER_CODE_CURRENCY, pErr); #else // 0xfdfc is the last currency character before the first digit character // in http://source.icu-project.org/repos/icu/icu/tags/release-52-1/source/data/unidata/FractionalUCA.txt const UChar ignoreSymbolsVariableTop[] = { 0xfdfc }; ucol_setVariableTop(pClonedCollator, ignoreSymbolsVariableTop, 1, pErr); #endif } ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, strength, pErr); // casing differs at the tertiary level. // if strength is less than tertiary, but we are not ignoring case, then we need to flip CASE_LEVEL On if (strength < UCOL_TERTIARY && !isIgnoreCase) { ucol_setAttribute(pClonedCollator, UCOL_CASE_LEVEL, UCOL_ON, pErr); } return pClonedCollator; }