Example #1
0
U_CFUNC UBool U_EXPORT2
ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) {
    const UChar *unfold, *p;
    int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;

    if(csp->unfold==NULL || s==NULL) {
        return FALSE; /* no reverse case folding data, or no string */
    }
    if(length<=1) {
        /* the string is too short to find any match */
        /*
         * more precise would be:
         * if(!u_strHasMoreChar32Than(s, length, 1))
         * but this does not make much practical difference because
         * a single supplementary code point would just not be found
         */
        return FALSE;
    }

    unfold=csp->unfold;
    unfoldRows=unfold[UCASE_UNFOLD_ROWS];
    unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
    unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
    unfold+=unfoldRowWidth;

    if(length>unfoldStringWidth) {
        /* the string is too long to find any match */
        return FALSE;
    }

    /* do a binary search for the string */
    start=0;
    limit=unfoldRows;
    while(start<limit) {
        i=(start+limit)/2;
        p=unfold+(i*unfoldRowWidth);
        result=strcmpMax(s, length, p, unfoldStringWidth);

        if(result==0) {
            /* found the string: add each code point, and its case closure */
            UChar32 c;

            for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
                U16_NEXT_UNSAFE(p, i, c);
                sa->add(sa->set, c);
                ucase_addCaseClosure(csp, c, sa);
            }
            return TRUE;
        } else if(result<0) {
            limit=i;
        } else /* result>0 */ {
            start=i+1;
        }
    }

    return FALSE; /* string not found */
}
Example #2
0
UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
    if (isFrozen() || isBogus()) {
        return *this;
    }
    if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
        const UCaseProps *csp = ucase_getSingleton();
        {
            UnicodeSet foldSet(*this);
            UnicodeString str;
            USetAdder sa = {
                foldSet.toUSet(),
                _set_add,
                _set_addRange,
                _set_addString,
                NULL, // don't need remove()
                NULL // don't need removeRange()
            };

            // start with input set to guarantee inclusion
            // USET_CASE: remove strings because the strings will actually be reduced (folded);
            //            therefore, start with no strings and add only those needed
            if (attribute & USET_CASE_INSENSITIVE) {
                foldSet.strings->removeAllElements();
            }

            int32_t n = getRangeCount();
            UChar32 result;
            const UChar *full;
            int32_t locCache = 0;

            for (int32_t i=0; i<n; ++i) {
                UChar32 start = getRangeStart(i);
                UChar32 end   = getRangeEnd(i);

                if (attribute & USET_CASE_INSENSITIVE) {
                    // full case closure
                    for (UChar32 cp=start; cp<=end; ++cp) {
                        ucase_addCaseClosure(csp, cp, &sa);
                    }
                } else {
                    // add case mappings
                    // (does not add long s for regular s, or Kelvin for k, for example)
                    for (UChar32 cp=start; cp<=end; ++cp) {
                        result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
                        addCaseMapping(foldSet, result, full, str);

                        result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
                        addCaseMapping(foldSet, result, full, str);

                        result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
                        addCaseMapping(foldSet, result, full, str);

                        result = ucase_toFullFolding(csp, cp, &full, 0);
                        addCaseMapping(foldSet, result, full, str);
                    }
                }
            }
            if (strings != NULL && strings->size() > 0) {
                if (attribute & USET_CASE_INSENSITIVE) {
                    for (int32_t j=0; j<strings->size(); ++j) {
                        str = *(const UnicodeString *) strings->elementAt(j);
                        str.foldCase();
                        if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
                            foldSet.add(str); // does not map to code points: add the folded string itself
                        }
                    }
                } else {
                    Locale root("");
#if !UCONFIG_NO_BREAK_ITERATION
                    UErrorCode status = U_ZERO_ERROR;
                    BreakIterator *bi = BreakIterator::createWordInstance(root, status);
                    if (U_SUCCESS(status)) {
#endif
                        const UnicodeString *pStr;

                        for (int32_t j=0; j<strings->size(); ++j) {
                            pStr = (const UnicodeString *) strings->elementAt(j);
                            (str = *pStr).toLower(root);
                            foldSet.add(str);
#if !UCONFIG_NO_BREAK_ITERATION
                            (str = *pStr).toTitle(bi, root);
                            foldSet.add(str);
#endif
                            (str = *pStr).toUpper(root);
                            foldSet.add(str);
                            (str = *pStr).foldCase();
                            foldSet.add(str);
                        }
#if !UCONFIG_NO_BREAK_ITERATION
                    }
                    delete bi;
#endif
                }
            }
            *this = foldSet;
        }
    }
    return *this;
}