/** * Performs a case insensitive string compare between two UTF-8 strings. * * This is a simplified compare, as only the simplified lower/upper case folding * specified by the unicode specs are used. It does not consider character pairs * as they are used in some languages, just simple upper & lower case compares. * * The result is the difference between the mismatching codepoints after they * both have been lower cased. * * If the string encoding is invalid the function will assert (strict builds) * and use RTStrCmp for the remainder of the string. * * @returns < 0 if the first string less than the second string. * @returns 0 if the first string identical to the second string. * @returns > 0 if the first string greater than the second string. * @param psz1 First UTF-8 string. Null is allowed. * @param psz2 Second UTF-8 string. Null is allowed. */ RTDECL(int) RTStrICmp(const char *psz1, const char *psz2) { if (psz1 == psz2) return 0; if (!psz1) return -1; if (!psz2) return 1; const char *pszStart1 = psz1; for (;;) { /* Get the codepoints */ RTUNICP uc1; int rc = RTStrGetCpEx(&psz1, &uc1); if (RT_FAILURE(rc)) { AssertRC(rc); psz1--; break; } RTUNICP uc2; rc = RTStrGetCpEx(&psz2, &uc2); if (RT_FAILURE(rc)) { AssertRC(rc); psz2--; psz1 = RTStrPrevCp(pszStart1, psz1); break; } /* compare */ int iDiff = uc1 - uc2; if (iDiff) { iDiff = RTUniCpToUpper(uc1) != RTUniCpToUpper(uc2); if (iDiff) { iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* lower case diff last! */ if (iDiff) return iDiff; } } /* hit the terminator? */ if (!uc1) return 0; } /* Hit some bad encoding, continue in case sensitive mode. */ return RTStrCmp(psz1, psz2); }
/** * Validates the name entry. * * @returns IPRT status code. * @param pszEntry The entry name to validate. * @param pfNeedNormalization Where to return whether it needs normalization * or not. Optional. * @param pcchEntry Where to return the length. Optional. */ static int rtManifestValidateNameEntry(const char *pszEntry, bool *pfNeedNormalization, size_t *pcchEntry) { int rc; bool fNeedNormalization = false; const char *pszCur = pszEntry; for (;;) { RTUNICP uc; rc = RTStrGetCpEx(&pszCur, &uc); if (RT_FAILURE(rc)) return rc; if (!uc) break; if (uc == '\\') fNeedNormalization = true; else if (uc < 32 || uc == ':' || uc == '(' || uc == ')') return VERR_INVALID_NAME; } if (pfNeedNormalization) *pfNeedNormalization = fNeedNormalization; size_t cchEntry = pszCur - pszEntry - 1; if (!cchEntry) rc = VERR_INVALID_NAME; if (pcchEntry) *pcchEntry = cchEntry; return rc; }
RTDECL(int) RTUtf16CmpUtf8(PCRTUTF16 pwsz1, const char *psz2) { /* * NULL and empty strings are all the same. */ if (!pwsz1) return !psz2 || !*psz2 ? 0 : -1; if (!psz2) return !*pwsz1 ? 0 : 1; /* * Compare with a UTF-8 string by enumerating them char by char. */ for (;;) { RTUNICP uc1; int rc = RTUtf16GetCpEx(&pwsz1, &uc1); AssertRCReturn(rc, 1); RTUNICP uc2; rc = RTStrGetCpEx(&psz2, &uc2); AssertRCReturn(rc, -1); if (uc1 == uc2) { if (uc1) continue; return 0; } return uc1 < uc2 ? -1 : 1; } }
RTDECL(char *) RTStrToUpper(char *psz) { /* * Loop the code points in the string, converting them one by one. * * ASSUMES that the folded code points have an encoding that is equal or * shorter than the original (this is presently correct). */ const char *pszSrc = psz; char *pszDst = psz; RTUNICP uc; do { int rc = RTStrGetCpEx(&pszSrc, &uc); if (RT_SUCCESS(rc)) { uc = RTUniCpToUpper(uc); pszDst = RTStrPutCp(pszDst, uc); } else { /* bad encoding, just copy it quietly (uc == RTUNICP_INVALID (!= 0)). */ AssertRC(rc); *pszDst++ = pszSrc[-1]; } Assert((uintptr_t)pszDst <= (uintptr_t)pszSrc); } while (uc != 0); return psz; }
RTCString RTCString::substrCP(size_t pos /*= 0*/, size_t n /*= npos*/) const { RTCString ret; if (n) { const char *psz; if ((psz = c_str())) { RTUNICP cp; // walk the UTF-8 characters until where the caller wants to start size_t i = pos; while (*psz && i--) if (RT_FAILURE(RTStrGetCpEx(&psz, &cp))) return ret; // return empty string on bad encoding const char *pFirst = psz; if (n == npos) // all the rest: ret = pFirst; else { i = n; while (*psz && i--) if (RT_FAILURE(RTStrGetCpEx(&psz, &cp))) return ret; // return empty string on bad encoding size_t cbCopy = psz - pFirst; if (cbCopy) { ret.reserve(cbCopy + 1); // may throw bad_alloc #ifndef RT_EXCEPTIONS_ENABLED AssertRelease(capacity() >= cbCopy + 1); #endif memcpy(ret.m_psz, pFirst, cbCopy); ret.m_cch = cbCopy; ret.m_psz[cbCopy] = '\0'; } } } } return ret; }
/** * Look for an unicode code point in the separator string. * * @returns true if it's a separator, false if it isn't. * @param Cp The code point. * @param pszSeparators The separators. */ static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators) { /* This could be done in a more optimal fashion. Probably worth a separate RTStr function at some point. */ for (;;) { RTUNICP CpSep; int rc = RTStrGetCpEx(&pszSeparators, &CpSep); AssertRCReturn(rc, false); if (CpSep == Cp) return true; if (!CpSep) return false; } }
/** * Filter a the filename in the against a filter. * * @returns true if the name matches the filter. * @returns false if the name doesn't match filter. * @param pDir The directory handle. * @param pszName The path to match to the filter. */ static DECLCALLBACK(bool) rtDirFilterWinNtMatchNoWildcards(PRTDIR pDir, const char *pszName) { /* * Walk the string and compare. */ PCRTUNICP pucFilter = pDir->puszFilter; const char *psz = pszName; RTUNICP uc; do { int rc = RTStrGetCpEx(&psz, &uc); AssertRCReturn(rc, false); RTUNICP ucFilter = *pucFilter++; if ( uc != ucFilter && RTUniCpToUpper(uc) != ucFilter) return false; } while (uc); return true; }
/** * Skips any delimiters at the start of the string that is pointed to. * * @returns VINF_SUCCESS or RTStrGetCpEx status code. * @param ppszSrc Where to get and return the string pointer. * @param pszSeparators The separators. * @param cchSeparators The length of @a pszSeparators. */ static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators) { const char *pszSrc = *ppszSrc; const char *pszRet; for (;;) { pszRet = pszSrc; RTUNICP Cp; int rc = RTStrGetCpEx(&pszSrc, &Cp); if (RT_FAILURE(rc)) { *ppszSrc = pszRet; return rc; } if ( !Cp || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators)) break; } *ppszSrc = pszRet; return VINF_SUCCESS; }
/** * Filter a the filename in the against a filter. * * The rules are as follows: * '?' Matches exactly one char. * '*' Matches zero or more chars. * '<' The dos star, matches zero or more chars except the DOS dot. * '>' The dos question mark, matches one char, but dots and end-of-name eats them. * '"' The dos dot, matches a dot or end-of-name. * * @returns true if the name matches the filter. * @returns false if the name doesn't match filter. * @param iDepth The recursion depth. * @param pszName The path to match to the filter. * @param puszFilter The filter string. */ static bool rtDirFilterWinNtMatchBase(unsigned iDepth, const char *pszName, PCRTUNICP puszFilter) { AssertReturn(iDepth++ < 256, false); /* * Walk the string and match it up char by char. */ RTUNICP uc; do { RTUNICP ucFilter = *puszFilter++; int rc = RTStrGetCpEx(&pszName, &uc); AssertRCReturn(rc, false); switch (ucFilter) { /* Exactly one char. */ case '?': if (!uc) return false; break; /* One char, but the dos dot and end-of-name eats '>' and '<'. */ case '>': /* dos ? */ if (!uc) return rtDirFilterWinNtMatchEon(puszFilter); if (uc == '.') { while ((ucFilter = *puszFilter) == '>' || ucFilter == '<') puszFilter++; if (ucFilter == '"' || ucFilter == '.') /* not 100% sure about the last dot */ ++puszFilter; else /* the does question mark doesn't match '.'s, so backtrack. */ pszName = RTStrPrevCp(NULL, pszName); } break; /* Match a dot or the end-of-name. */ case '"': /* dos '.' */ if (uc != '.') { if (uc) return false; return rtDirFilterWinNtMatchEon(puszFilter); } break; /* zero or more */ case '*': return rtDirFilterWinNtMatchStar(iDepth, uc, pszName, puszFilter); case '<': /* dos '*' */ return rtDirFilterWinNtMatchDosStar(iDepth, uc, pszName, puszFilter); /* uppercased match */ default: { if (RTUniCpToUpper(uc) != ucFilter) return false; break; } } } while (uc); return true; }
/** * Recursive star matching. * * @returns true on match. * @returns false on miss. */ static bool rtDirFilterWinNtMatchStar(unsigned iDepth, RTUNICP uc, const char *pszNext, PCRTUNICP puszFilter) { AssertReturn(iDepth++ < 256, false); /* * Inspect the next filter char(s) until we find something to work on. */ for (;;) { RTUNICP ucFilter = *puszFilter++; switch (ucFilter) { /* * The star expression is the last in the pattern. * Cool, that means we're done! */ case '\0': return true; /* * Just in case (doubt we ever get here), just merge it with the current one. */ case '*': break; /* * Skip a fixed number of chars. * Figure out how many by walking the filter ignoring '*'s. */ case '?': { unsigned cQms = 1; while ((ucFilter = *puszFilter) == '*' || ucFilter == '?') { cQms += ucFilter == '?'; puszFilter++; } do { if (!uc) return false; int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); } while (--cQms > 0); /* done? */ if (!ucFilter) return true; break; } /* * The simple way is to try char by char and match the remaining * expression. If it's trailing we're done. */ case '>': /* dos question mark */ { if (rtDirFilterWinNtMatchEon(puszFilter)) return true; const char *pszStart = pszNext; do { if (rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter)) return true; int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); } while (uc); /* backtrack and do the current char. */ pszNext = RTStrPrevCp(NULL, pszStart); AssertReturn(pszNext, false); return rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter); } /* * This bugger is interesting. * Time for brute force. Iterate the name char by char. */ case '<': { do { if (rtDirFilterWinNtMatchDosStar(iDepth, uc, pszNext, puszFilter)) return true; int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); } while (uc); return false; } /* * This guy matches a '.' or the end of the name. * It's very simple if the rest of the filter expression also matches eon. */ case '"': if (rtDirFilterWinNtMatchEon(puszFilter)) return true; ucFilter = '.'; /* fall thru */ /* * Ok, we've got zero or more characters. * We'll try match starting at each occurrence of this character. */ default: { do { if ( RTUniCpToUpper(uc) == ucFilter && rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter)) return true; int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); } while (uc); return false; } } } /* for (;;) */ /* won't ever get here! */ }
/** * Recursive star matching. * Practically the same as normal star, except that the dos star stops * when hitting the last dot. * * @returns true on match. * @returns false on miss. */ static bool rtDirFilterWinNtMatchDosStar(unsigned iDepth, RTUNICP uc, const char *pszNext, PCRTUNICP puszFilter) { AssertReturn(iDepth++ < 256, false); /* * If there is no dos star, we should work just like the NT star. * Since that's generally faster algorithms, we jump down to there if we can. */ const char *pszDosDot = strrchr(pszNext, '.'); if (!pszDosDot && uc == '.') pszDosDot = pszNext - 1; if (!pszDosDot) return rtDirFilterWinNtMatchStar(iDepth, uc, pszNext, puszFilter); /* * Inspect the next filter char(s) until we find something to work on. */ RTUNICP ucFilter = *puszFilter++; switch (ucFilter) { /* * The star expression is the last in the pattern. * We're fine if the name ends with a dot. */ case '\0': return !pszDosDot[1]; /* * Simplified by brute force. */ case '>': /* dos question mark */ case '?': case '*': case '<': /* dos star */ case '"': /* dos dot */ { puszFilter--; const char *pszStart = pszNext; do { if (rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter)) return true; int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); } while ((intptr_t)pszDosDot - (intptr_t)pszNext >= -1); /* backtrack and do the current char. */ pszNext = RTStrPrevCp(NULL, pszStart); AssertReturn(pszNext, false); return rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter); } /* * Ok, we've got zero or more characters. * We'll try match starting at each occurrence of this character. */ default: { if ( RTUniCpToUpper(uc) == ucFilter && rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter)) return true; do { int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false); if ( RTUniCpToUpper(uc) == ucFilter && rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter)) return true; } while ((intptr_t)pszDosDot - (intptr_t)pszNext >= -1); return false; } } /* won't ever get here! */ }
static void test2(RTTEST hTest) { RTTestSub(hTest, "UTF-8 upper/lower encoding assumption"); #define CHECK_EQUAL(str1, str2) \ do \ { \ RTTESTI_CHECK(strlen((str1).c_str()) == (str1).length()); \ RTTESTI_CHECK((str1).length() == (str2).length()); \ RTTESTI_CHECK(mymemcmp((str1).c_str(), (str2).c_str(), (str2).length() + 1) == 0); \ } while (0) RTCString strTmp, strExpect; char szDst[16]; /* Some simple ascii stuff. */ strTmp = "abcdefghijklmnopqrstuvwxyz0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\"; strExpect = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\"; strTmp.toUpper(); CHECK_EQUAL(strTmp, strExpect); strTmp.toLower(); strExpect = "abcdefghijklmnopqrstuvwxyz0123456abcdefghijklmnopqrstuvwxyz;-+/\\"; CHECK_EQUAL(strTmp, strExpect); strTmp = "abcdefghijklmnopqrstuvwxyz0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\"; strTmp.toLower(); CHECK_EQUAL(strTmp, strExpect); /* Collect all upper and lower case code points. */ RTCString strLower(""); strLower.reserve(_4M); RTCString strUpper(""); strUpper.reserve(_4M); for (RTUNICP uc = 1; uc <= 0x10fffd; uc++) { /* Unicode 4.01, I think, introduced a few codepoints with lower/upper mappings that aren't up for roundtrips and which case folding has a different UTF-8 length. We'll just skip them here as there are very few: - Dotless small i and dotless capital I folds into ASCII I and i. - The small letter long s folds to ASCII S. - Greek prosgegrammeni folds to iota, which is a letter with both upper and lower case foldings of its own. */ if (uc == 0x131 || uc == 0x130 || uc == 0x17f || 0x1fbe) continue; if (RTUniCpIsLower(uc)) { RTTESTI_CHECK_MSG(uc < 0xd800 || (uc > 0xdfff && uc != 0xfffe && uc != 0xffff), ("%#x\n", uc)); strLower.appendCodePoint(uc); } if (RTUniCpIsUpper(uc)) { RTTESTI_CHECK_MSG(uc < 0xd800 || (uc > 0xdfff && uc != 0xfffe && uc != 0xffff), ("%#x\n", uc)); strUpper.appendCodePoint(uc); } } RTTESTI_CHECK(strlen(strLower.c_str()) == strLower.length()); RTTESTI_CHECK(strlen(strUpper.c_str()) == strUpper.length()); /* Fold each code point in the lower case string and check that it encodes into the same or less number of bytes. */ size_t cch = 0; const char *pszCur = strLower.c_str(); RTCString strUpper2(""); strUpper2.reserve(strLower.length() + 64); for (;;) { RTUNICP ucLower; const char * const pszPrev = pszCur; RTTESTI_CHECK_RC_BREAK(RTStrGetCpEx(&pszCur, &ucLower), VINF_SUCCESS); size_t const cchSrc = pszCur - pszPrev; if (!ucLower) break; RTUNICP const ucUpper = RTUniCpToUpper(ucLower); const char *pszDstEnd = RTStrPutCp(szDst, ucUpper); size_t const cchDst = pszDstEnd - &szDst[0]; RTTESTI_CHECK_MSG(cchSrc >= cchDst, ("ucLower=%#x %u bytes; ucUpper=%#x %u bytes\n", ucLower, cchSrc, ucUpper, cchDst)); cch += cchDst; strUpper2.appendCodePoint(ucUpper); /* roundtrip stability */ RTUNICP const ucUpper2 = RTUniCpToUpper(ucUpper); RTTESTI_CHECK_MSG(ucUpper2 == ucUpper, ("ucUpper2=%#x ucUpper=%#x\n", ucUpper2, ucUpper)); RTUNICP const ucLower2 = RTUniCpToLower(ucUpper); RTTESTI_CHECK_MSG(ucLower2 == ucLower, ("ucLower2=%#x ucLower=%#x\n", ucLower2, ucLower)); RTUNICP const ucUpper3 = RTUniCpToUpper(ucLower2); RTTESTI_CHECK_MSG(ucUpper3 == ucUpper, ("ucUpper3=%#x ucUpper=%#x\n", ucUpper3, ucUpper)); pszDstEnd = RTStrPutCp(szDst, ucLower2); size_t const cchLower2 = pszDstEnd - &szDst[0]; RTTESTI_CHECK_MSG(cchDst == cchLower2, ("ucLower2=%#x %u bytes; ucUpper=%#x %u bytes; ucLower=%#x\n", ucLower2, cchLower2, ucUpper, cchDst, ucLower)); } RTTESTI_CHECK(strlen(strUpper2.c_str()) == strUpper2.length()); RTTESTI_CHECK_MSG(cch == strUpper2.length(), ("cch=%u length()=%u\n", cch, strUpper2.length())); /* the toUpper method shall do the same thing. */ strTmp = strLower; CHECK_EQUAL(strTmp, strLower); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); /* Ditto for the upper case string. */ cch = 0; pszCur = strUpper.c_str(); RTCString strLower2(""); strLower2.reserve(strUpper.length() + 64); for (;;) { RTUNICP ucUpper; const char * const pszPrev = pszCur; RTTESTI_CHECK_RC_BREAK(RTStrGetCpEx(&pszCur, &ucUpper), VINF_SUCCESS); size_t const cchSrc = pszCur - pszPrev; if (!ucUpper) break; RTUNICP const ucLower = RTUniCpToLower(ucUpper); const char *pszDstEnd = RTStrPutCp(szDst, ucLower); size_t const cchDst = pszDstEnd - &szDst[0]; RTTESTI_CHECK_MSG(cchSrc >= cchDst, ("ucUpper=%#x %u bytes; ucLower=%#x %u bytes\n", ucUpper, cchSrc, ucLower, cchDst)); cch += cchDst; strLower2.appendCodePoint(ucLower); /* roundtrip stability */ RTUNICP const ucLower2 = RTUniCpToLower(ucLower); RTTESTI_CHECK_MSG(ucLower2 == ucLower, ("ucLower2=%#x ucLower=%#x\n", ucLower2, ucLower)); RTUNICP const ucUpper2 = RTUniCpToUpper(ucLower); RTTESTI_CHECK_MSG(ucUpper2 == ucUpper, ("ucUpper2=%#x ucUpper=%#x\n", ucUpper2, ucUpper)); RTUNICP const ucLower3 = RTUniCpToLower(ucUpper2); RTTESTI_CHECK_MSG(ucLower3 == ucLower, ("ucLower3=%#x ucLower=%#x\n", ucLower3, ucLower)); pszDstEnd = RTStrPutCp(szDst, ucUpper2); size_t const cchUpper2 = pszDstEnd - &szDst[0]; RTTESTI_CHECK_MSG(cchDst == cchUpper2, ("ucUpper2=%#x %u bytes; ucLower=%#x %u bytes\n", ucUpper2, cchUpper2, ucLower, cchDst)); } RTTESTI_CHECK(strlen(strLower2.c_str()) == strLower2.length()); RTTESTI_CHECK_MSG(cch == strLower2.length(), ("cch=%u length()=%u\n", cch, strLower2.length())); strTmp = strUpper; CHECK_EQUAL(strTmp, strUpper); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); /* Checks of folding stability when nothing shall change. */ strTmp = strUpper; CHECK_EQUAL(strTmp, strUpper); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper); strTmp = strUpper2; CHECK_EQUAL(strTmp, strUpper2); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); strTmp = strLower; CHECK_EQUAL(strTmp, strLower); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower); strTmp = strLower2; CHECK_EQUAL(strTmp, strLower2); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); /* Check folding stability for roundtrips. */ strTmp = strUpper; CHECK_EQUAL(strTmp, strUpper); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); strTmp.toUpper(); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); strTmp.toUpper(); strTmp.toLower(); CHECK_EQUAL(strTmp, strLower2); strTmp = strLower; CHECK_EQUAL(strTmp, strLower); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); strTmp.toLower(); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); strTmp.toLower(); strTmp.toUpper(); CHECK_EQUAL(strTmp, strUpper2); }
RTDECL(char *) RTStrIStr(const char *pszHaystack, const char *pszNeedle) { /* Any NULL strings means NULL return. (In the RTStrCmp tradition.) */ if (!pszHaystack) return NULL; if (!pszNeedle) return NULL; /* The empty string matches everything. */ if (!*pszNeedle) return (char *)pszHaystack; /* * The search strategy is to pick out the first char of the needle, fold it, * and match it against the haystack code point by code point. When encountering * a matching code point we use RTStrNICmp for the remainder (if any) of the needle. */ const char * const pszNeedleStart = pszNeedle; RTUNICP Cp0; RTStrGetCpEx(&pszNeedle, &Cp0); /* pszNeedle is advanced one code point. */ size_t const cchNeedle = strlen(pszNeedle); size_t const cchNeedleCp0= pszNeedle - pszNeedleStart; RTUNICP const Cp0Lower = RTUniCpToLower(Cp0); RTUNICP const Cp0Upper = RTUniCpToUpper(Cp0); if ( Cp0Lower == Cp0Upper && Cp0Lower == Cp0) { /* Cp0 is not a case sensitive char. */ for (;;) { RTUNICP Cp; RTStrGetCpEx(&pszHaystack, &Cp); if (!Cp) break; if ( Cp == Cp0 && !RTStrNICmp(pszHaystack, pszNeedle, cchNeedle)) return (char *)pszHaystack - cchNeedleCp0; } } else if ( Cp0Lower == Cp0 || Cp0Upper != Cp0) { /* Cp0 is case sensitive */ for (;;) { RTUNICP Cp; RTStrGetCpEx(&pszHaystack, &Cp); if (!Cp) break; if ( ( Cp == Cp0Upper || Cp == Cp0Lower) && !RTStrNICmp(pszHaystack, pszNeedle, cchNeedle)) return (char *)pszHaystack - cchNeedleCp0; } } else { /* Cp0 is case sensitive and folds to two difference chars. (paranoia) */ for (;;) { RTUNICP Cp; RTStrGetCpEx(&pszHaystack, &Cp); if (!Cp) break; if ( ( Cp == Cp0 || Cp == Cp0Upper || Cp == Cp0Lower) && !RTStrNICmp(pszHaystack, pszNeedle, cchNeedle)) return (char *)pszHaystack - cchNeedleCp0; } } return NULL; }
RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine, uint32_t fFlags, const char *pszSeparators) { /* * Some input validation. */ AssertPtr(pszCmdLine); AssertPtr(pcArgs); AssertPtr(ppapszArgv); AssertReturn( fFlags == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH || fFlags == RTGETOPTARGV_CNV_QUOTE_MS_CRT, VERR_INVALID_FLAGS); if (!pszSeparators) pszSeparators = " \t\n\r"; else AssertPtr(pszSeparators); size_t const cchSeparators = strlen(pszSeparators); AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER); /* * Parse the command line and chop off it into argv individual argv strings. */ int rc = VINF_SUCCESS; const char *pszSrc = pszCmdLine; char *pszDup = (char *)RTMemAlloc(strlen(pszSrc) + 1); char *pszDst = pszDup; if (!pszDup) return VERR_NO_STR_MEMORY; char **papszArgs = NULL; unsigned iArg = 0; while (*pszSrc) { /* Skip stuff */ rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators); if (RT_FAILURE(rc)) break; if (!*pszSrc) break; /* Start a new entry. */ if ((iArg % 32) == 0) { void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *)); if (!pvNew) { rc = VERR_NO_MEMORY; break; } papszArgs = (char **)pvNew; } papszArgs[iArg++] = pszDst; /* * Parse and copy the string over. */ RTUNICP Cp; if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH) { /* * Bourne shell style. */ RTUNICP CpQuote = 0; for (;;) { rc = RTStrGetCpEx(&pszSrc, &Cp); if (RT_FAILURE(rc) || !Cp) break; if (!CpQuote) { if (Cp == '"' || Cp == '\'') CpQuote = Cp; else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators)) break; else if (Cp != '\\') pszDst = RTStrPutCp(pszDst, Cp); else { /* escaped char */ rc = RTStrGetCpEx(&pszSrc, &Cp); if (RT_FAILURE(rc) || !Cp) break; pszDst = RTStrPutCp(pszDst, Cp); } } else if (CpQuote != Cp) { if (Cp != '\\' || CpQuote == '\'') pszDst = RTStrPutCp(pszDst, Cp); else { /* escaped char */ rc = RTStrGetCpEx(&pszSrc, &Cp); if (RT_FAILURE(rc) || !Cp) break; pszDst = RTStrPutCp(pszDst, Cp); } } else CpQuote = 0; } } else { /* * Microsoft CRT style. */ Assert((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT); bool fInQuote = false; for (;;) { rc = RTStrGetCpEx(&pszSrc, &Cp); if (RT_FAILURE(rc) || !Cp) break; if (Cp == '"') fInQuote = !fInQuote; else if (!fInQuote && rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators)) break; else if (Cp != '\\') pszDst = RTStrPutCp(pszDst, Cp); else { /* A backslash sequence is only relevant if followed by a double quote, then it will work like an escape char. */ size_t cQuotes = 1; while (*pszSrc == '\\') { cQuotes++; pszSrc++; } if (*pszSrc != '"') /* Not an escape sequence. */ while (cQuotes-- > 0) pszDst = RTStrPutCp(pszDst, '\\'); else { /* Escape sequence. Output half of the slashes. If odd number, output the escaped double quote . */ while (cQuotes >= 2) { pszDst = RTStrPutCp(pszDst, '\\'); cQuotes -= 2; } if (!cQuotes) fInQuote = !fInQuote; else pszDst = RTStrPutCp(pszDst, '"'); pszSrc++; } } } } *pszDst++ = '\0'; if (RT_FAILURE(rc) || !Cp) break; } if (RT_FAILURE(rc)) { RTMemFree(pszDup); RTMemFree(papszArgs); return rc; } /* * Terminate the array. * Check for empty string to make sure we've got an array. */ if (iArg == 0) { RTMemFree(pszDup); papszArgs = (char **)RTMemAlloc(1 * sizeof(char *)); if (!papszArgs) return VERR_NO_MEMORY; } papszArgs[iArg] = NULL; *pcArgs = iArg; *ppapszArgv = papszArgs; return VINF_SUCCESS; }
RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine, const char *pszSeparators) { /* * Some input validation. */ AssertPtr(pszCmdLine); AssertPtr(pcArgs); AssertPtr(ppapszArgv); if (!pszSeparators) pszSeparators = " \t\n\r"; else AssertPtr(pszSeparators); size_t const cchSeparators = strlen(pszSeparators); AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER); /* * Parse the command line and chop off it into argv individual argv strings. */ int rc = VINF_SUCCESS; const char *pszSrc = pszCmdLine; char *pszDup = (char *)RTMemAlloc(strlen(pszSrc) + 1); char *pszDst = pszDup; if (!pszDup) return VERR_NO_STR_MEMORY; char **papszArgs = NULL; unsigned iArg = 0; while (*pszSrc) { /* Skip stuff */ rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators); if (RT_FAILURE(rc)) break; if (!*pszSrc) break; /* Start a new entry. */ if ((iArg % 32) == 0) { void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *)); if (!pvNew) { rc = VERR_NO_MEMORY; break; } papszArgs = (char **)pvNew; } papszArgs[iArg++] = pszDst; /* Parse and copy the string over. */ RTUNICP CpQuote = 0; RTUNICP Cp; for (;;) { rc = RTStrGetCpEx(&pszSrc, &Cp); if (RT_FAILURE(rc) || !Cp) break; if (!CpQuote) { if (Cp == '"' || Cp == '\'') CpQuote = Cp; else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators)) break; else pszDst = RTStrPutCp(pszDst, Cp); } else if (CpQuote != Cp) pszDst = RTStrPutCp(pszDst, Cp); else CpQuote = 0; } *pszDst++ = '\0'; if (RT_FAILURE(rc) || !Cp) break; } if (RT_FAILURE(rc)) { RTMemFree(pszDup); RTMemFree(papszArgs); return rc; } /* * Terminate the array. * Check for empty string to make sure we've got an array. */ if (iArg == 0) { RTMemFree(pszDup); papszArgs = (char **)RTMemAlloc(1 * sizeof(char *)); if (!papszArgs) return VERR_NO_MEMORY; } papszArgs[iArg] = NULL; *pcArgs = iArg; *ppapszArgv = papszArgs; return VINF_SUCCESS; }
/** * Internal write API, stream lock already held. * * @returns IPRT status code. * @param pStream The stream. * @param pvBuf What to write. * @param cbWrite How much to write. * @param pcbWritten Where to optionally return the number of bytes * written. * @param fSureIsText Set if we're sure this is UTF-8 text already. */ static int rtStrmWriteLocked(PRTSTREAM pStream, const void *pvBuf, size_t cbWrite, size_t *pcbWritten, bool fSureIsText) { int rc = pStream->i32Error; if (RT_FAILURE(rc)) return rc; if (pStream->fRecheckMode) rtStreamRecheckMode(pStream); #ifdef RT_OS_WINDOWS /* * Use the unicode console API when possible in order to avoid stuff * getting lost in unnecessary code page translations. */ HANDLE hCon; if (rtStrmIsConsoleUnlocked(pStream, &hCon)) { # ifdef HAVE_FWRITE_UNLOCKED if (!fflush_unlocked(pStream->pFile)) # else if (!fflush(pStream->pFile)) # endif { /** @todo Consider buffering later. For now, we'd rather correct output than * fast output. */ DWORD cwcWritten = 0; PRTUTF16 pwszSrc = NULL; size_t cwcSrc = 0; rc = RTStrToUtf16Ex((const char *)pvBuf, cbWrite, &pwszSrc, 0, &cwcSrc); if (RT_SUCCESS(rc)) { if (!WriteConsoleW(hCon, pwszSrc, (DWORD)cwcSrc, &cwcWritten, NULL)) { /* try write char-by-char to avoid heap problem. */ cwcWritten = 0; while (cwcWritten != cwcSrc) { DWORD cwcThis; if (!WriteConsoleW(hCon, &pwszSrc[cwcWritten], 1, &cwcThis, NULL)) { if (!pcbWritten || cwcWritten == 0) rc = RTErrConvertFromErrno(GetLastError()); break; } if (cwcThis != 1) /* Unable to write current char (amount)? */ break; cwcWritten++; } } if (RT_SUCCESS(rc)) { if (cwcWritten == cwcSrc) { if (pcbWritten) *pcbWritten = cbWrite; } else if (pcbWritten) { PCRTUTF16 pwszCur = pwszSrc; const char *pszCur = (const char *)pvBuf; while ((uintptr_t)(pwszCur - pwszSrc) < cwcWritten) { RTUNICP CpIgnored; RTUtf16GetCpEx(&pwszCur, &CpIgnored); RTStrGetCpEx(&pszCur, &CpIgnored); } *pcbWritten = pszCur - (const char *)pvBuf; } else rc = VERR_WRITE_ERROR; } RTUtf16Free(pwszSrc); } } else rc = RTErrConvertFromErrno(errno); if (RT_FAILURE(rc)) ASMAtomicWriteS32(&pStream->i32Error, rc); return rc; } #endif /* RT_OS_WINDOWS */ /* * If we're sure it's text output, convert it from UTF-8 to the current * code page before printing it. * * Note! Partial writes are not supported in this scenario because we * cannot easily report back a written length matching the input. */ /** @todo Skip this if the current code set is UTF-8. */ if ( pStream->fCurrentCodeSet && !pStream->fBinary && ( fSureIsText || rtStrmIsUtf8Text(pvBuf, cbWrite)) ) { char *pszSrcFree = NULL; const char *pszSrc = (const char *)pvBuf; if (pszSrc[cbWrite]) { pszSrc = pszSrcFree = RTStrDupN(pszSrc, cbWrite); if (pszSrc == NULL) rc = VERR_NO_STR_MEMORY; } if (RT_SUCCESS(rc)) { char *pszSrcCurCP; rc = RTStrUtf8ToCurrentCP(&pszSrcCurCP, pszSrc); if (RT_SUCCESS(rc)) { size_t cchSrcCurCP = strlen(pszSrcCurCP); IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc / mempcpy again */ #ifdef HAVE_FWRITE_UNLOCKED ssize_t cbWritten = fwrite_unlocked(pszSrcCurCP, cchSrcCurCP, 1, pStream->pFile); #else ssize_t cbWritten = fwrite(pszSrcCurCP, cchSrcCurCP, 1, pStream->pFile); #endif IPRT_ALIGNMENT_CHECKS_ENABLE(); if (cbWritten == 1) { if (pcbWritten) *pcbWritten = cbWrite; } #ifdef HAVE_FWRITE_UNLOCKED else if (!ferror_unlocked(pStream->pFile)) #else else if (!ferror(pStream->pFile)) #endif { if (pcbWritten) *pcbWritten = 0; } else rc = VERR_WRITE_ERROR; RTStrFree(pszSrcCurCP); } RTStrFree(pszSrcFree); } if (RT_FAILURE(rc)) ASMAtomicWriteS32(&pStream->i32Error, rc); return rc; }
RTDECL(int) RTVfsIoStrmValidateUtf8Encoding(RTVFSIOSTREAM hVfsIos, uint32_t fFlags, PRTFOFF poffError) { /* * Validate input. */ if (poffError) { AssertPtrReturn(poffError, VINF_SUCCESS); *poffError = 0; } AssertReturn(!(fFlags & ~RTVFS_VALIDATE_UTF8_VALID_MASK), VERR_INVALID_PARAMETER); /* * The loop. */ char achBuf[1024 + 1]; size_t cbUsed = 0; int rc; for (;;) { /* * Fill the buffer */ size_t cbRead = 0; rc = RTVfsIoStrmRead(hVfsIos, &achBuf[cbUsed], sizeof(achBuf) - cbUsed - 1, true /*fBlocking*/, &cbRead); if (RT_FAILURE(rc)) break; cbUsed += cbRead; if (!cbUsed) { Assert(rc == VINF_EOF); break; } achBuf[sizeof(achBuf) - 1] = '\0'; /* * Process the data in the buffer, maybe leaving the final chars till * the next round. */ const char *pszCur = achBuf; size_t offEnd = rc == VINF_EOF ? cbUsed : cbUsed >= 7 ? cbUsed - 7 : 0; size_t off; while ((off = (pszCur - &achBuf[0])) < offEnd) { RTUNICP uc; rc = RTStrGetCpEx(&pszCur, &uc); if (RT_FAILURE(rc)) break; if (!uc) { if (fFlags & RTVFS_VALIDATE_UTF8_NO_NULL) { rc = VERR_INVALID_UTF8_ENCODING; break; } } else if (uc > 0x10ffff) { if (fFlags & RTVFS_VALIDATE_UTF8_BY_RTC_3629) { rc = VERR_INVALID_UTF8_ENCODING; break; } } } if (off < cbUsed) { cbUsed -= off; memmove(achBuf, pszCur, cbUsed); } } /* * Set the offset on failure. */ if (poffError && RT_FAILURE(rc)) { } return rc == VINF_EOF ? VINF_SUCCESS : rc; }