/** * Performs a case insensitive string compare between two UTF-8 strings, given a * maximum string length. * * This is a simplified compare, as only the simplified lower/upper case folding * specified by the unicode specs are used. It does not consider character pairs * as they are used in some languages, just simple upper & lower case compares. * * The result is the difference between the mismatching codepoints after they * both have been lower cased. * * If the string encoding is invalid the function will assert (strict builds) * and use RTStrCmp for the remainder of the string. * * @returns < 0 if the first string less than the second string. * @returns 0 if the first string identical to the second string. * @returns > 0 if the first string greater than the second string. * @param psz1 First UTF-8 string. Null is allowed. * @param psz2 Second UTF-8 string. Null is allowed. * @param cchMax Maximum string length */ RTDECL(int) RTStrNICmp(const char *psz1, const char *psz2, size_t cchMax) { if (cchMax == 0) return 0; if (psz1 == psz2) return 0; if (!psz1) return -1; if (!psz2) return 1; for (;;) { /* Get the codepoints */ RTUNICP uc1; size_t cchMax2 = cchMax; int rc = RTStrGetCpNEx(&psz1, &cchMax, &uc1); if (RT_FAILURE(rc)) { AssertRC(rc); psz1--; cchMax++; break; } RTUNICP uc2; rc = RTStrGetCpNEx(&psz2, &cchMax2, &uc2); if (RT_FAILURE(rc)) { AssertRC(rc); psz2--; psz1 -= (cchMax - cchMax2 + 1); /* This can't overflow, can it? */ cchMax = cchMax2 + 1; break; } /* compare */ int iDiff = uc1 - uc2; if (iDiff) { iDiff = RTUniCpToUpper(uc1) != RTUniCpToUpper(uc2); if (iDiff) { iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* lower case diff last! */ if (iDiff) return iDiff; } } /* hit the terminator? */ if (!uc1 || cchMax == 0) return 0; } /* Hit some bad encoding, continue in case insensitive mode. */ return RTStrNCmp(psz1, psz2, cchMax); }
/** * Performs a case insensitive string compare between two UTF-8 strings. * * This is a simplified compare, as only the simplified lower/upper case folding * specified by the unicode specs are used. It does not consider character pairs * as they are used in some languages, just simple upper & lower case compares. * * The result is the difference between the mismatching codepoints after they * both have been lower cased. * * If the string encoding is invalid the function will assert (strict builds) * and use RTStrCmp for the remainder of the string. * * @returns < 0 if the first string less than the second string. * @returns 0 if the first string identical to the second string. * @returns > 0 if the first string greater than the second string. * @param psz1 First UTF-8 string. Null is allowed. * @param psz2 Second UTF-8 string. Null is allowed. */ RTDECL(int) RTStrICmp(const char *psz1, const char *psz2) { if (psz1 == psz2) return 0; if (!psz1) return -1; if (!psz2) return 1; const char *pszStart1 = psz1; for (;;) { /* Get the codepoints */ RTUNICP uc1; int rc = RTStrGetCpEx(&psz1, &uc1); if (RT_FAILURE(rc)) { AssertRC(rc); psz1--; break; } RTUNICP uc2; rc = RTStrGetCpEx(&psz2, &uc2); if (RT_FAILURE(rc)) { AssertRC(rc); psz2--; psz1 = RTStrPrevCp(pszStart1, psz1); break; } /* compare */ int iDiff = uc1 - uc2; if (iDiff) { iDiff = RTUniCpToUpper(uc1) != RTUniCpToUpper(uc2); if (iDiff) { iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* lower case diff last! */ if (iDiff) return iDiff; } } /* hit the terminator? */ if (!uc1) return 0; } /* Hit some bad encoding, continue in case sensitive mode. */ return RTStrCmp(psz1, psz2); }
RTDECL(char *) RTStrToUpper(char *psz) { /* * Loop the code points in the string, converting them one by one. * * ASSUMES that the folded code points have an encoding that is equal or * shorter than the original (this is presently correct). */ const char *pszSrc = psz; char *pszDst = psz; RTUNICP uc; do { int rc = RTStrGetCpEx(&pszSrc, &uc); if (RT_SUCCESS(rc)) { uc = RTUniCpToUpper(uc); pszDst = RTStrPutCp(pszDst, uc); } else { /* bad encoding, just copy it quietly (uc == RTUNICP_INVALID (!= 0)). */ AssertRC(rc); *pszDst++ = pszSrc[-1]; } Assert((uintptr_t)pszDst <= (uintptr_t)pszSrc); } while (uc != 0); return psz; }
/** * Detects a few annoying unicode points with unstable case folding for UTF-8. * * Unicode 4.01, I think, introduces a few codepoints with lower/upper mappings * that has a different length when encoded as UTF-8. This breaks some * assumptions we used to make. Since it's just a handful codepoints, we'll * detect them and ignore them here. The actual case folding functions in * IPRT will of course deal with this in a more robust manner. * * @returns true if problematic, false if not. * @param uc The codepoints. */ static bool isUnevenUtf8FoldingCp(RTUNICP uc) { RTUNICP ucLower = RTUniCpToLower(uc); RTUNICP ucUpper = RTUniCpToUpper(uc); //return RTUniCpCalcUtf8Len(ucLower) != RTUniCpCalcUtf8Len(ucUpper); return false; }
/** * Filter a the filename in the against a filter. * * @returns true if the name matches the filter. * @returns false if the name doesn't match filter. * @param pDir The directory handle. * @param pszName The path to match to the filter. */ static DECLCALLBACK(bool) rtDirFilterWinNtMatchNoWildcards(PRTDIR pDir, const char *pszName) { /* * Walk the string and compare. */ PCRTUNICP pucFilter = pDir->puszFilter; const char *psz = pszName; RTUNICP uc; do { int rc = RTStrGetCpEx(&psz, &uc); AssertRCReturn(rc, false); RTUNICP ucFilter = *pucFilter++; if ( uc != ucFilter && RTUniCpToUpper(uc) != ucFilter) return false; } while (uc); return true; }
/** * Initializes a WinNt like wildcard filter. * * @returns Pointer to the filter function. * @returns NULL if the filter doesn't filter out anything. * @param pDir The directory handle (not yet opened). */ static PFNRTDIRFILTER rtDirFilterWinNtInit(PRTDIR pDir) { /* * Check for the usual * and <"< (*.* in DOS language) patterns. */ if ( (pDir->cchFilter == 1 && pDir->pszFilter[0] == '*') || (pDir->cchFilter == 3 && !memcmp(pDir->pszFilter, "<\".>", 3)) ) return NULL; /* * Uppercase the expression, also do a little optimizations when possible. */ bool fHaveWildcards = false; unsigned iRead = 0; unsigned iWrite = 0; while (iRead < pDir->cucFilter) { RTUNICP uc = pDir->puszFilter[iRead++]; if (uc == '*') { fHaveWildcards = true; /* remove extra stars. */ RTUNICP uc2; while ((uc2 = pDir->puszFilter[iRead + 1]) == '*') iRead++; } else if (uc == '?' || uc == '>' || uc == '<' || uc == '"') fHaveWildcards = true; else uc = RTUniCpToUpper(uc); pDir->puszFilter[iWrite++] = uc; } pDir->puszFilter[iWrite] = 0; pDir->cucFilter = iWrite; return fHaveWildcards ? rtDirFilterWinNtMatch : rtDirFilterWinNtMatchNoWildcards; }
/** * Filter a the filename in the against a filter. * * The rules are as follows: * '?' Matches exactly one char. * '*' Matches zero or more chars. * '<' The dos star, matches zero or more chars except the DOS dot. * '>' The dos question mark, matches one char, but dots and end-of-name eats them. * '"' The dos dot, matches a dot or end-of-name. * * @returns true if the name matches the filter. * @returns false if the name doesn't match filter. * @param iDepth The recursion depth. * @param pszName The path to match to the filter. * @param puszFilter The filter string. */ static bool rtDirFilterWinNtMatchBase(unsigned iDepth, const char *pszName, PCRTUNICP puszFilter) { AssertReturn(iDepth++ < 256, false); /* * Walk the string and match it up char by char. */ RTUNICP uc; do { RTUNICP ucFilter = *puszFilter++; int rc = RTStrGetCpEx(&pszName, &uc); AssertRCReturn(rc, false); switch (ucFilter) { /* Exactly one char. */ case '?': if (!uc) return false; break; /* One char, but the dos dot and end-of-name eats '>' and '<'. */ case '>': /* dos ? */ if (!uc) return rtDirFilterWinNtMatchEon(puszFilter); if (uc == '.') { while ((ucFilter = *puszFilter) == '>' || ucFilter == '<') puszFilter++; if (ucFilter == '"' || ucFilter == '.') /* not 100% sure about the last dot */ ++puszFilter; else /* the does question mark doesn't match '.'s, so backtrack. */ pszName = RTStrPrevCp(NULL, pszName); } break; /* Match a dot or the end-of-name. */ case '"': /* dos '.' */ if (uc != '.') { if (uc) return false; return rtDirFilterWinNtMatchEon(puszFilter); } break; /* zero or more */ case '*': return rtDirFilterWinNtMatchStar(iDepth, uc, pszName, puszFilter); case '<': /* dos '*' */ return rtDirFilterWinNtMatchDosStar(iDepth, uc, pszName, puszFilter); /* uppercased match */ default: { if (RTUniCpToUpper(uc) != ucFilter) return false; break; } } } while (uc); return true; }
/** * Recursive star matching. * * @returns true on match. * @returns false on miss. */ static bool rtDirFilterWinNtMatchStar(unsigned iDepth, RTUNICP uc, const char *pszNext, PCRTUNICP puszFilter) { AssertReturn(iDepth++ < 256, false); /* * Inspect the next filter char(s) until we find something to work on. */ for (;;) { RTUNICP ucFilter = *puszFilter++; switch (ucFilter) { /* * The star expression is the last in the pattern. * Cool, that means we're done! */ case '\0': return true; /* * Just in case (doubt we ever get here), just merge it with the current one. */ case '*': break; /* * Skip a fixed number of chars. * Figure out how many by walking the filter ignoring '*'s. */ case '?': { unsigned cQms = 1; while ((ucFilter = *puszFilter) == '*' || ucFilter == '?') { cQms += ucFilter == '?'; puszFilter++; } do { if (!uc) return false; int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); } while (--cQms > 0); /* done? */ if (!ucFilter) return true; break; } /* * The simple way is to try char by char and match the remaining * expression. If it's trailing we're done. */ case '>': /* dos question mark */ { if (rtDirFilterWinNtMatchEon(puszFilter)) return true; const char *pszStart = pszNext; do { if (rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter)) return true; int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); } while (uc); /* backtrack and do the current char. */ pszNext = RTStrPrevCp(NULL, pszStart); AssertReturn(pszNext, false); return rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter); } /* * This bugger is interesting. * Time for brute force. Iterate the name char by char. */ case '<': { do { if (rtDirFilterWinNtMatchDosStar(iDepth, uc, pszNext, puszFilter)) return true; int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); } while (uc); return false; } /* * This guy matches a '.' or the end of the name. * It's very simple if the rest of the filter expression also matches eon. */ case '"': if (rtDirFilterWinNtMatchEon(puszFilter)) return true; ucFilter = '.'; /* fall thru */ /* * Ok, we've got zero or more characters. * We'll try match starting at each occurrence of this character. */ default: { do { if ( RTUniCpToUpper(uc) == ucFilter && rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter)) return true; int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); } while (uc); return false; } } } /* for (;;) */ /* won't ever get here! */ }
/** * Recursive star matching. * Practically the same as normal star, except that the dos star stops * when hitting the last dot. * * @returns true on match. * @returns false on miss. */ static bool rtDirFilterWinNtMatchDosStar(unsigned iDepth, RTUNICP uc, const char *pszNext, PCRTUNICP puszFilter) { AssertReturn(iDepth++ < 256, false); /* * If there is no dos star, we should work just like the NT star. * Since that's generally faster algorithms, we jump down to there if we can. */ const char *pszDosDot = strrchr(pszNext, '.'); if (!pszDosDot && uc == '.') pszDosDot = pszNext - 1; if (!pszDosDot) return rtDirFilterWinNtMatchStar(iDepth, uc, pszNext, puszFilter); /* * Inspect the next filter char(s) until we find something to work on. */ RTUNICP ucFilter = *puszFilter++; switch (ucFilter) { /* * The star expression is the last in the pattern. * We're fine if the name ends with a dot. */ case '\0': return !pszDosDot[1]; /* * Simplified by brute force. */ case '>': /* dos question mark */ case '?': case '*': case '<': /* dos star */ case '"': /* dos dot */ { puszFilter--; const char *pszStart = pszNext; do { if (rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter)) return true; int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); } while ((intptr_t)pszDosDot - (intptr_t)pszNext >= -1); /* backtrack and do the current char. */ pszNext = RTStrPrevCp(NULL, pszStart); AssertReturn(pszNext, false); return rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter); } /* * Ok, we've got zero or more characters. * We'll try match starting at each occurrence of this character. */ default: { if ( RTUniCpToUpper(uc) == ucFilter && rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter)) return true; do { int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); if ( RTUniCpToUpper(uc) == ucFilter && rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter)) return true; } while ((intptr_t)pszDosDot - (intptr_t)pszNext >= -1); return false; } } /* won't ever get here! */ }
RTDECL(char *) RTStrIStr(const char *pszHaystack, const char *pszNeedle) { /* Any NULL strings means NULL return. (In the RTStrCmp tradition.) */ if (!pszHaystack) return NULL; if (!pszNeedle) return NULL; /* The empty string matches everything. */ if (!*pszNeedle) return (char *)pszHaystack; /* * The search strategy is to pick out the first char of the needle, fold it, * and match it against the haystack code point by code point. When encountering * a matching code point we use RTStrNICmp for the remainder (if any) of the needle. */ const char * const pszNeedleStart = pszNeedle; RTUNICP Cp0; RTStrGetCpEx(&pszNeedle, &Cp0); /* pszNeedle is advanced one code point. */ size_t const cchNeedle = strlen(pszNeedle); size_t const cchNeedleCp0= pszNeedle - pszNeedleStart; RTUNICP const Cp0Lower = RTUniCpToLower(Cp0); RTUNICP const Cp0Upper = RTUniCpToUpper(Cp0); if ( Cp0Lower == Cp0Upper && Cp0Lower == Cp0) { /* Cp0 is not a case sensitive char. */ for (;;) { RTUNICP Cp; RTStrGetCpEx(&pszHaystack, &Cp); if (!Cp) break; if ( Cp == Cp0 && !RTStrNICmp(pszHaystack, pszNeedle, cchNeedle)) return (char *)pszHaystack - cchNeedleCp0; } } else if ( Cp0Lower == Cp0 || Cp0Upper != Cp0) { /* Cp0 is case sensitive */ for (;;) { RTUNICP Cp; RTStrGetCpEx(&pszHaystack, &Cp); if (!Cp) break; if ( ( Cp == Cp0Upper || Cp == Cp0Lower) && !RTStrNICmp(pszHaystack, pszNeedle, cchNeedle)) return (char *)pszHaystack - cchNeedleCp0; } } else { /* Cp0 is case sensitive and folds to two difference chars. (paranoia) */ for (;;) { RTUNICP Cp; RTStrGetCpEx(&pszHaystack, &Cp); if (!Cp) break; if ( ( Cp == Cp0 || Cp == Cp0Upper || Cp == Cp0Lower) && !RTStrNICmp(pszHaystack, pszNeedle, cchNeedle)) return (char *)pszHaystack - cchNeedleCp0; } } return NULL; }
/** * Helper for RTPathCompare() and RTPathStartsWith(). * * @returns similar to strcmp. * @param pszPath1 Path to compare. * @param pszPath2 Path to compare. * @param fLimit Limit the comparison to the length of \a pszPath2 * @internal */ static int rtPathCompare(const char *pszPath1, const char *pszPath2, bool fLimit) { if (pszPath1 == pszPath2) return 0; if (!pszPath1) return -1; if (!pszPath2) return 1; #if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2) PRTUNICP puszPath1; int rc = RTStrToUni(pszPath1, &puszPath1); if (RT_FAILURE(rc)) return -1; PRTUNICP puszPath2; rc = RTStrToUni(pszPath2, &puszPath2); if (RT_FAILURE(rc)) { RTUniFree(puszPath1); return 1; } int iDiff = 0; PRTUNICP puszTmpPath1 = puszPath1; PRTUNICP puszTmpPath2 = puszPath2; for (;;) { register RTUNICP uc1 = *puszTmpPath1; register RTUNICP uc2 = *puszTmpPath2; if (uc1 != uc2) { if (uc1 == '\\') uc1 = '/'; else uc1 = RTUniCpToUpper(uc1); if (uc2 == '\\') uc2 = '/'; else uc2 = RTUniCpToUpper(uc2); if (uc1 != uc2) { iDiff = uc1 > uc2 ? 1 : -1; /* (overflow/underflow paranoia) */ if (fLimit && uc2 == '\0') iDiff = 0; break; } } if (!uc1) break; puszTmpPath1++; puszTmpPath2++; } RTUniFree(puszPath2); RTUniFree(puszPath1); return iDiff; #else if (!fLimit) return strcmp(pszPath1, pszPath2); return strncmp(pszPath1, pszPath2, strlen(pszPath2)); #endif }
static void test2(RTTEST hTest) { RTTestSub(hTest, "UTF-8 upper/lower encoding assumption"); #define CHECK_EQUAL(str1, str2) \ do \ { \ RTTESTI_CHECK(strlen((str1).c_str()) == (str1).length()); \ RTTESTI_CHECK((str1).length() == (str2).length()); \ RTTESTI_CHECK(mymemcmp((str1).c_str(), (str2).c_str(), (str2).length() + 1) == 0); \ } while (0) RTCString strTmp, strExpect; char szDst[16]; /* Some simple ascii stuff. */ strTmp = "abcdefghijklmnopqrstuvwxyz0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\"; strExpect = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\"; strTmp.toUpper(); CHECK_EQUAL(strTmp, strExpect); strTmp.toLower(); strExpect = "abcdefghijklmnopqrstuvwxyz0123456abcdefghijklmnopqrstuvwxyz;-+/\\"; CHECK_EQUAL(strTmp, strExpect); strTmp = "abcdefghijklmnopqrstuvwxyz0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\"; strTmp.toLower(); CHECK_EQUAL(strTmp, strExpect); /* Collect all upper and lower case code points. */ RTCString strLower(""); strLower.reserve(_4M); RTCString strUpper(""); strUpper.reserve(_4M); for (RTUNICP uc = 1; uc <= 0x10fffd; uc++) { /* Unicode 4.01, I think, introduced a few codepoints with lower/upper mappings that aren't up for roundtrips and which case folding has a different UTF-8 length. We'll just skip them here as there are very few: - Dotless small i and dotless capital I folds into ASCII I and i. - The small letter long s folds to ASCII S. - Greek prosgegrammeni folds to iota, which is a letter with both upper and lower case foldings of its own. */ if (uc == 0x131 || uc == 0x130 || uc == 0x17f || 0x1fbe) continue; if (RTUniCpIsLower(uc)) { RTTESTI_CHECK_MSG(uc < 0xd800 || (uc > 0xdfff && uc != 0xfffe && uc != 0xffff), ("%#x\n", uc)); strLower.appendCodePoint(uc); } if (RTUniCpIsUpper(uc)) { RTTESTI_CHECK_MSG(uc < 0xd800 || (uc > 0xdfff && uc != 0xfffe && uc != 0xffff), ("%#x\n", uc)); strUpper.appendCodePoint(uc); } } RTTESTI_CHECK(strlen(strLower.c_str()) == strLower.length()); RTTESTI_CHECK(strlen(strUpper.c_str()) == strUpper.length()); /* Fold each code point in the lower case string and check that it encodes into the same or less number of bytes. */ size_t cch = 0; const char *pszCur = strLower.c_str(); RTCString strUpper2(""); strUpper2.reserve(strLower.length() + 64); for (;;) { RTUNICP ucLower; const char * const pszPrev = pszCur; RTTESTI_CHECK_RC_BREAK(RTStrGetCpEx(&pszCur, &ucLower), VINF_SUCCESS); size_t const cchSrc = pszCur - pszPrev; if (!ucLower) break; RTUNICP const ucUpper = RTUniCpToUpper(ucLower); const char *pszDstEnd = RTStrPutCp(szDst, ucUpper); size_t const cchDst = pszDstEnd - &szDst[0]; RTTESTI_CHECK_MSG(cchSrc >= cchDst, ("ucLower=%#x %u bytes; ucUpper=%#x %u bytes\n", ucLower, cchSrc, ucUpper, cchDst)); cch += cchDst; strUpper2.appendCodePoint(ucUpper); /* roundtrip stability */ RTUNICP const ucUpper2 = RTUniCpToUpper(ucUpper); RTTESTI_CHECK_MSG(ucUpper2 == ucUpper, ("ucUpper2=%#x ucUpper=%#x\n", ucUpper2, ucUpper)); RTUNICP const ucLower2 = RTUniCpToLower(ucUpper); RTTESTI_CHECK_MSG(ucLower2 == ucLower, ("ucLower2=%#x ucLower=%#x\n", ucLower2, ucLower)); RTUNICP const ucUpper3 = RTUniCpToUpper(ucLower2); RTTESTI_CHECK_MSG(ucUpper3 == ucUpper, ("ucUpper3=%#x ucUpper=%#x\n", ucUpper3, ucUpper)); pszDstEnd = RTStrPutCp(szDst, ucLower2); size_t const cchLower2 = pszDstEnd - &szDst[0]; RTTESTI_CHECK_MSG(cchDst == cchLower2, ("ucLower2=%#x %u bytes; ucUpper=%#x %u bytes; ucLower=%#x\n", ucLower2, cchLower2, ucUpper, cchDst, ucLower)); } RTTESTI_CHECK(strlen(strUpper2.c_str()) == strUpper2.length()); RTTESTI_CHECK_MSG(cch == strUpper2.length(), ("cch=%u length()=%u\n", cch, strUpper2.length())); /* the toUpper method shall do the same thing. */ strTmp = strLower; CHECK_EQUAL(strTmp, strLower); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); /* Ditto for the upper case string. */ cch = 0; pszCur = strUpper.c_str(); RTCString strLower2(""); strLower2.reserve(strUpper.length() + 64); for (;;) { RTUNICP ucUpper; const char * const pszPrev = pszCur; RTTESTI_CHECK_RC_BREAK(RTStrGetCpEx(&pszCur, &ucUpper), VINF_SUCCESS); size_t const cchSrc = pszCur - pszPrev; if (!ucUpper) break; RTUNICP const ucLower = RTUniCpToLower(ucUpper); const char *pszDstEnd = RTStrPutCp(szDst, ucLower); size_t const cchDst = pszDstEnd - &szDst[0]; RTTESTI_CHECK_MSG(cchSrc >= cchDst, ("ucUpper=%#x %u bytes; ucLower=%#x %u bytes\n", ucUpper, cchSrc, ucLower, cchDst)); cch += cchDst; strLower2.appendCodePoint(ucLower); /* roundtrip stability */ RTUNICP const ucLower2 = RTUniCpToLower(ucLower); RTTESTI_CHECK_MSG(ucLower2 == ucLower, ("ucLower2=%#x ucLower=%#x\n", ucLower2, ucLower)); RTUNICP const ucUpper2 = RTUniCpToUpper(ucLower); RTTESTI_CHECK_MSG(ucUpper2 == ucUpper, ("ucUpper2=%#x ucUpper=%#x\n", ucUpper2, ucUpper)); RTUNICP const ucLower3 = RTUniCpToLower(ucUpper2); RTTESTI_CHECK_MSG(ucLower3 == ucLower, ("ucLower3=%#x ucLower=%#x\n", ucLower3, ucLower)); pszDstEnd = RTStrPutCp(szDst, ucUpper2); size_t const cchUpper2 = pszDstEnd - &szDst[0]; RTTESTI_CHECK_MSG(cchDst == cchUpper2, ("ucUpper2=%#x %u bytes; ucLower=%#x %u bytes\n", ucUpper2, cchUpper2, ucLower, cchDst)); } RTTESTI_CHECK(strlen(strLower2.c_str()) == strLower2.length()); RTTESTI_CHECK_MSG(cch == strLower2.length(), ("cch=%u length()=%u\n", cch, strLower2.length())); strTmp = strUpper; CHECK_EQUAL(strTmp, strUpper); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); /* Checks of folding stability when nothing shall change. */ strTmp = strUpper; CHECK_EQUAL(strTmp, strUpper); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper); strTmp = strUpper2; CHECK_EQUAL(strTmp, strUpper2); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); strTmp = strLower; CHECK_EQUAL(strTmp, strLower); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower); strTmp = strLower2; CHECK_EQUAL(strTmp, strLower2); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); /* Check folding stability for roundtrips. */ strTmp = strUpper; CHECK_EQUAL(strTmp, strUpper); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); strTmp.toUpper(); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); strTmp.toUpper(); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); strTmp = strLower; CHECK_EQUAL(strTmp, strLower); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); strTmp.toLower(); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); strTmp.toLower(); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); }
RTDECL(int) RTUtf16ICmp(register PCRTUTF16 pwsz1, register PCRTUTF16 pwsz2) { if (pwsz1 == pwsz2) return 0; if (!pwsz1) return -1; if (!pwsz2) return 1; PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */ for (;;) { register RTUTF16 wc1 = *pwsz1; register RTUTF16 wc2 = *pwsz2; register int iDiff = wc1 - wc2; if (iDiff) { /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */ if ( wc1 < 0xd800 || wc2 < 0xd800 || wc1 > 0xdfff || wc2 > 0xdfff) { /* simple UCS-2 char */ iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2); if (iDiff) iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2); } else { /* a damned pair */ RTUNICP uc1; RTUNICP uc2; if (wc1 >= 0xdc00) { if (pwsz1Start == pwsz1) return iDiff; uc1 = pwsz1[-1]; if (uc1 < 0xd800 || uc1 >= 0xdc00) return iDiff; uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff)); uc2 = 0x10000 + (((pwsz2[-1] & 0x3ff) << 10) | (wc2 & 0x3ff)); } else { uc1 = *++pwsz1; if (uc1 < 0xdc00 || uc1 >= 0xe000) return iDiff; uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff)); uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (*++pwsz2 & 0x3ff)); } iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2); if (iDiff) iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */ } if (iDiff) return iDiff; } if (!wc1) return 0; pwsz1++; pwsz2++; } }