U_CFUNC UBool U_EXPORT2 ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) { const UChar *unfold, *p; int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth; if(csp->unfold==NULL || s==NULL) { return FALSE; /* no reverse case folding data, or no string */ } if(length<=1) { /* the string is too short to find any match */ /* * more precise would be: * if(!u_strHasMoreChar32Than(s, length, 1)) * but this does not make much practical difference because * a single supplementary code point would just not be found */ return FALSE; } unfold=csp->unfold; unfoldRows=unfold[UCASE_UNFOLD_ROWS]; unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH]; unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH]; unfold+=unfoldRowWidth; if(length>unfoldStringWidth) { /* the string is too long to find any match */ return FALSE; } /* do a binary search for the string */ start=0; limit=unfoldRows; while(start<limit) { i=(start+limit)/2; p=unfold+(i*unfoldRowWidth); result=strcmpMax(s, length, p, unfoldStringWidth); if(result==0) { /* found the string: add each code point, and its case closure */ UChar32 c; for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) { U16_NEXT_UNSAFE(p, i, c); sa->add(sa->set, c); ucase_addCaseClosure(csp, c, sa); } return TRUE; } else if(result<0) { limit=i; } else /* result>0 */ { start=i+1; } } return FALSE; /* string not found */ }
UnicodeSet& UnicodeSet::closeOver(int32_t attribute) { if (isFrozen() || isBogus()) { return *this; } if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) { const UCaseProps *csp = ucase_getSingleton(); { UnicodeSet foldSet(*this); UnicodeString str; USetAdder sa = { foldSet.toUSet(), _set_add, _set_addRange, _set_addString, NULL, // don't need remove() NULL // don't need removeRange() }; // start with input set to guarantee inclusion // USET_CASE: remove strings because the strings will actually be reduced (folded); // therefore, start with no strings and add only those needed if (attribute & USET_CASE_INSENSITIVE) { foldSet.strings->removeAllElements(); } int32_t n = getRangeCount(); UChar32 result; const UChar *full; int32_t locCache = 0; for (int32_t i=0; i<n; ++i) { UChar32 start = getRangeStart(i); UChar32 end = getRangeEnd(i); if (attribute & USET_CASE_INSENSITIVE) { // full case closure for (UChar32 cp=start; cp<=end; ++cp) { ucase_addCaseClosure(csp, cp, &sa); } } else { // add case mappings // (does not add long s for regular s, or Kelvin for k, for example) for (UChar32 cp=start; cp<=end; ++cp) { result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache); addCaseMapping(foldSet, result, full, str); result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache); addCaseMapping(foldSet, result, full, str); result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache); addCaseMapping(foldSet, result, full, str); result = ucase_toFullFolding(csp, cp, &full, 0); addCaseMapping(foldSet, result, full, str); } } } if (strings != NULL && strings->size() > 0) { if (attribute & USET_CASE_INSENSITIVE) { for (int32_t j=0; j<strings->size(); ++j) { str = *(const UnicodeString *) strings->elementAt(j); str.foldCase(); if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) { foldSet.add(str); // does not map to code points: add the folded string itself } } } else { Locale root(""); #if !UCONFIG_NO_BREAK_ITERATION UErrorCode status = U_ZERO_ERROR; BreakIterator *bi = BreakIterator::createWordInstance(root, status); if (U_SUCCESS(status)) { #endif const UnicodeString *pStr; for (int32_t j=0; j<strings->size(); ++j) { pStr = (const UnicodeString *) strings->elementAt(j); (str = *pStr).toLower(root); foldSet.add(str); #if !UCONFIG_NO_BREAK_ITERATION (str = *pStr).toTitle(bi, root); foldSet.add(str); #endif (str = *pStr).toUpper(root); foldSet.add(str); (str = *pStr).foldCase(); foldSet.add(str); } #if !UCONFIG_NO_BREAK_ITERATION } delete bi; #endif } } *this = foldSet; } } return *this; }