Exemplo n.º 1
0
/**
 * Performs a case insensitive string compare between two UTF-8 strings.
 *
 * This is a simplified compare, as only the simplified lower/upper case folding
 * specified by the unicode specs are used. It does not consider character pairs
 * as they are used in some languages, just simple upper & lower case compares.
 *
 * The result is the difference between the mismatching codepoints after they
 * both have been lower cased.
 *
 * If the string encoding is invalid the function will assert (strict builds)
 * and use RTStrCmp for the remainder of the string.
 *
 * @returns < 0 if the first string less than the second string.
 * @returns 0 if the first string identical to the second string.
 * @returns > 0 if the first string greater than the second string.
 * @param   psz1        First UTF-8 string. Null is allowed.
 * @param   psz2        Second UTF-8 string. Null is allowed.
 */
RTDECL(int) RTStrICmp(const char *psz1, const char *psz2)
{
    if (psz1 == psz2)
        return 0;
    if (!psz1)
        return -1;
    if (!psz2)
        return 1;

    const char *pszStart1 = psz1;
    for (;;)
    {
        /* Get the codepoints */
        RTUNICP uc1;
        int rc = RTStrGetCpEx(&psz1, &uc1);
        if (RT_FAILURE(rc))
        {
            AssertRC(rc);
            psz1--;
            break;
        }

        RTUNICP uc2;
        rc = RTStrGetCpEx(&psz2, &uc2);
        if (RT_FAILURE(rc))
        {
            AssertRC(rc);
            psz2--;
            psz1 = RTStrPrevCp(pszStart1, psz1);
            break;
        }

        /* compare */
        int iDiff = uc1 - uc2;
        if (iDiff)
        {
            iDiff = RTUniCpToUpper(uc1) != RTUniCpToUpper(uc2);
            if (iDiff)
            {
                iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* lower case diff last! */
                if (iDiff)
                    return iDiff;
            }
        }

        /* hit the terminator? */
        if (!uc1)
            return 0;
    }

    /* Hit some bad encoding, continue in case sensitive mode. */
    return RTStrCmp(psz1, psz2);
}
/**
 * Validates the name entry.
 *
 * @returns IPRT status code.
 * @param   pszEntry            The entry name to validate.
 * @param   pfNeedNormalization Where to return whether it needs normalization
 *                              or not.  Optional.
 * @param   pcchEntry           Where to return the length.  Optional.
 */
static int rtManifestValidateNameEntry(const char *pszEntry, bool *pfNeedNormalization, size_t *pcchEntry)
{
    int         rc;
    bool        fNeedNormalization = false;
    const char *pszCur             = pszEntry;

    for (;;)
    {
        RTUNICP uc;
        rc = RTStrGetCpEx(&pszCur, &uc);
        if (RT_FAILURE(rc))
            return rc;
        if (!uc)
            break;
        if (uc == '\\')
            fNeedNormalization = true;
        else if (uc < 32 || uc == ':' || uc == '(' || uc == ')')
            return VERR_INVALID_NAME;
    }

    if (pfNeedNormalization)
        *pfNeedNormalization = fNeedNormalization;

    size_t cchEntry = pszCur - pszEntry - 1;
    if (!cchEntry)
        rc = VERR_INVALID_NAME;
    if (pcchEntry)
        *pcchEntry = cchEntry;

    return rc;
}
Exemplo n.º 3
0
RTDECL(int) RTUtf16CmpUtf8(PCRTUTF16 pwsz1, const char *psz2)
{
    /*
     * NULL and empty strings are all the same.
     */
    if (!pwsz1)
        return !psz2 || !*psz2 ? 0 : -1;
    if (!psz2)
        return !*pwsz1         ? 0 :  1;

    /*
     * Compare with a UTF-8 string by enumerating them char by char.
     */
    for (;;)
    {
        RTUNICP uc1;
        int rc = RTUtf16GetCpEx(&pwsz1, &uc1);
        AssertRCReturn(rc, 1);

        RTUNICP uc2;
        rc = RTStrGetCpEx(&psz2, &uc2);
        AssertRCReturn(rc, -1);
        if (uc1 == uc2)
        {
            if (uc1)
                continue;
            return 0;
        }
        return uc1 < uc2 ? -1 : 1;
    }
}
Exemplo n.º 4
0
RTDECL(char *) RTStrToUpper(char *psz)
{
    /*
     * Loop the code points in the string, converting them one by one.
     *
     * ASSUMES that the folded code points have an encoding that is equal or
     *         shorter than the original (this is presently correct).
     */
    const char *pszSrc = psz;
    char       *pszDst = psz;
    RTUNICP     uc;
    do
    {
        int rc = RTStrGetCpEx(&pszSrc, &uc);
        if (RT_SUCCESS(rc))
        {
            uc = RTUniCpToUpper(uc);
            pszDst = RTStrPutCp(pszDst, uc);
        }
        else
        {
            /* bad encoding, just copy it quietly (uc == RTUNICP_INVALID (!= 0)). */
            AssertRC(rc);
            *pszDst++ = pszSrc[-1];
        }
        Assert((uintptr_t)pszDst <= (uintptr_t)pszSrc);
    } while (uc != 0);

    return psz;
}
RTCString RTCString::substrCP(size_t pos /*= 0*/, size_t n /*= npos*/) const
{
    RTCString ret;

    if (n)
    {
        const char *psz;

        if ((psz = c_str()))
        {
            RTUNICP cp;

            // walk the UTF-8 characters until where the caller wants to start
            size_t i = pos;
            while (*psz && i--)
                if (RT_FAILURE(RTStrGetCpEx(&psz, &cp)))
                    return ret;     // return empty string on bad encoding

            const char *pFirst = psz;

            if (n == npos)
                // all the rest:
                ret = pFirst;
            else
            {
                i = n;
                while (*psz && i--)
                    if (RT_FAILURE(RTStrGetCpEx(&psz, &cp)))
                        return ret;     // return empty string on bad encoding

                size_t cbCopy = psz - pFirst;
                if (cbCopy)
                {
                    ret.reserve(cbCopy + 1); // may throw bad_alloc
#ifndef RT_EXCEPTIONS_ENABLED
                    AssertRelease(capacity() >= cbCopy + 1);
#endif
                    memcpy(ret.m_psz, pFirst, cbCopy);
                    ret.m_cch = cbCopy;
                    ret.m_psz[cbCopy] = '\0';
                }
            }
        }
    }

    return ret;
}
Exemplo n.º 6
0
/**
 * Look for an unicode code point in the separator string.
 *
 * @returns true if it's a separator, false if it isn't.
 * @param   Cp              The code point.
 * @param   pszSeparators   The separators.
 */
static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators)
{
    /* This could be done in a more optimal fashion.  Probably worth a
       separate RTStr function at some point. */
    for (;;)
    {
        RTUNICP CpSep;
        int rc = RTStrGetCpEx(&pszSeparators, &CpSep);
        AssertRCReturn(rc, false);
        if (CpSep == Cp)
            return true;
        if (!CpSep)
            return false;
    }
}
Exemplo n.º 7
0
/**
 * Filter a the filename in the against a filter.
 *
 * @returns true if the name matches the filter.
 * @returns false if the name doesn't match filter.
 * @param   pDir        The directory handle.
 * @param   pszName     The path to match to the filter.
 */
static DECLCALLBACK(bool) rtDirFilterWinNtMatchNoWildcards(PRTDIR pDir, const char *pszName)
{
    /*
     * Walk the string and compare.
     */
    PCRTUNICP   pucFilter = pDir->puszFilter;
    const char *psz = pszName;
    RTUNICP     uc;
    do
    {
        int rc = RTStrGetCpEx(&psz, &uc);
        AssertRCReturn(rc, false);
        RTUNICP ucFilter = *pucFilter++;
        if (    uc != ucFilter
            &&  RTUniCpToUpper(uc) != ucFilter)
            return false;
    } while (uc);
    return true;
}
Exemplo n.º 8
0
/**
 * Skips any delimiters at the start of the string that is pointed to.
 *
 * @returns VINF_SUCCESS or RTStrGetCpEx status code.
 * @param   ppszSrc         Where to get and return the string pointer.
 * @param   pszSeparators   The separators.
 * @param   cchSeparators   The length of @a pszSeparators.
 */
static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators)
{
    const char *pszSrc = *ppszSrc;
    const char *pszRet;
    for (;;)
    {
        pszRet = pszSrc;
        RTUNICP Cp;
        int rc = RTStrGetCpEx(&pszSrc, &Cp);
        if (RT_FAILURE(rc))
        {
            *ppszSrc = pszRet;
            return rc;
        }
        if (   !Cp
                || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
            break;
    }

    *ppszSrc = pszRet;
    return VINF_SUCCESS;
}
Exemplo n.º 9
0
/**
 * Filter a the filename in the against a filter.
 *
 * The rules are as follows:
 *      '?'     Matches exactly one char.
 *      '*'     Matches zero or more chars.
 *      '<'     The dos star, matches zero or more chars except the DOS dot.
 *      '>'     The dos question mark, matches one char, but dots and end-of-name eats them.
 *      '"'     The dos dot, matches a dot or end-of-name.
 *
 * @returns true if the name matches the filter.
 * @returns false if the name doesn't match filter.
 * @param   iDepth      The recursion depth.
 * @param   pszName     The path to match to the filter.
 * @param   puszFilter  The filter string.
 */
static bool rtDirFilterWinNtMatchBase(unsigned iDepth, const char *pszName, PCRTUNICP puszFilter)
{
    AssertReturn(iDepth++ < 256, false);

    /*
     * Walk the string and match it up char by char.
     */
    RTUNICP uc;
    do
    {
        RTUNICP ucFilter = *puszFilter++;
        int rc = RTStrGetCpEx(&pszName, &uc); AssertRCReturn(rc, false);
        switch (ucFilter)
        {
            /* Exactly one char. */
            case '?':
                if (!uc)
                    return false;
                break;

            /* One char, but the dos dot and end-of-name eats '>' and '<'. */
            case '>': /* dos ? */
                if (!uc)
                    return rtDirFilterWinNtMatchEon(puszFilter);
                if (uc == '.')
                {
                    while ((ucFilter = *puszFilter) == '>' || ucFilter == '<')
                        puszFilter++;
                    if (ucFilter == '"' || ucFilter == '.')  /* not 100% sure about the last dot */
                        ++puszFilter;
                    else /* the does question mark doesn't match '.'s, so backtrack. */
                        pszName = RTStrPrevCp(NULL, pszName);
                }
                break;

            /* Match a dot or the end-of-name. */
            case '"': /* dos '.' */
                if (uc != '.')
                {
                    if (uc)
                        return false;
                    return rtDirFilterWinNtMatchEon(puszFilter);
                }
                break;

            /* zero or more */
            case '*':
                return rtDirFilterWinNtMatchStar(iDepth, uc, pszName, puszFilter);
            case '<': /* dos '*' */
                return rtDirFilterWinNtMatchDosStar(iDepth, uc, pszName, puszFilter);


            /* uppercased match */
            default:
            {
                if (RTUniCpToUpper(uc) != ucFilter)
                    return false;
                break;
            }
        }
    } while (uc);

    return true;
}
Exemplo n.º 10
0
/**
 * Recursive star matching.
 *
 * @returns true on match.
 * @returns false on miss.
 */
static bool rtDirFilterWinNtMatchStar(unsigned iDepth, RTUNICP uc, const char *pszNext, PCRTUNICP puszFilter)
{
    AssertReturn(iDepth++ < 256, false);

    /*
     * Inspect the next filter char(s) until we find something to work on.
     */
    for (;;)
    {
        RTUNICP ucFilter = *puszFilter++;
        switch (ucFilter)
        {
            /*
             * The star expression is the last in the pattern.
             * Cool, that means we're done!
             */
            case '\0':
                return true;

            /*
             * Just in case (doubt we ever get here), just merge it with the current one.
             */
            case '*':
                break;

            /*
             * Skip a fixed number of chars.
             * Figure out how many by walking the filter ignoring '*'s.
             */
            case '?':
            {
                unsigned cQms = 1;
                while ((ucFilter = *puszFilter) == '*' || ucFilter == '?')
                {
                    cQms += ucFilter == '?';
                    puszFilter++;
                }
                do
                {
                    if (!uc)
                        return false;
                    int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false);
                } while (--cQms > 0);
                /* done? */
                if (!ucFilter)
                    return true;
                break;
            }

            /*
             * The simple way is to try char by char and match the remaining
             * expression. If it's trailing we're done.
             */
            case '>': /* dos question mark */
            {
                if (rtDirFilterWinNtMatchEon(puszFilter))
                    return true;
                const char *pszStart = pszNext;
                do
                {
                    if (rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter))
                        return true;
                    int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false);
                } while (uc);

                /* backtrack and do the current char. */
                pszNext = RTStrPrevCp(NULL, pszStart); AssertReturn(pszNext, false);
                return rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter);
            }

            /*
             * This bugger is interesting.
             * Time for brute force. Iterate the name char by char.
             */
            case '<':
            {
                do
                {
                    if (rtDirFilterWinNtMatchDosStar(iDepth, uc, pszNext, puszFilter))
                        return true;
                    int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false);
                } while (uc);
                return false;
            }

            /*
             * This guy matches a '.' or the end of the name.
             * It's very simple if the rest of the filter expression also matches eon.
             */
            case '"':
                if (rtDirFilterWinNtMatchEon(puszFilter))
                    return true;
                ucFilter = '.';
                /* fall thru */

            /*
             * Ok, we've got zero or more characters.
             * We'll try match starting at each occurrence of this character.
             */
            default:
            {
                do
                {
                    if (    RTUniCpToUpper(uc) == ucFilter
                        &&  rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter))
                        return true;
                    int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false);
                } while (uc);
                return false;
            }
        }
    } /* for (;;) */

    /* won't ever get here! */
}
Exemplo n.º 11
0
/**
 * Recursive star matching.
 * Practically the same as normal star, except that the dos star stops
 * when hitting the last dot.
 *
 * @returns true on match.
 * @returns false on miss.
 */
static bool rtDirFilterWinNtMatchDosStar(unsigned iDepth, RTUNICP uc, const char *pszNext, PCRTUNICP puszFilter)
{
    AssertReturn(iDepth++ < 256, false);

    /*
     * If there is no dos star, we should work just like the NT star.
     * Since that's generally faster algorithms, we jump down to there if we can.
     */
    const char *pszDosDot = strrchr(pszNext, '.');
    if (!pszDosDot && uc == '.')
        pszDosDot = pszNext - 1;
    if (!pszDosDot)
        return rtDirFilterWinNtMatchStar(iDepth, uc, pszNext, puszFilter);

    /*
     * Inspect the next filter char(s) until we find something to work on.
     */
    RTUNICP ucFilter = *puszFilter++;
    switch (ucFilter)
    {
        /*
         * The star expression is the last in the pattern.
         * We're fine if the name ends with a dot.
         */
        case '\0':
            return !pszDosDot[1];

        /*
         * Simplified by brute force.
         */
        case '>': /* dos question mark */
        case '?':
        case '*':
        case '<': /* dos star */
        case '"': /* dos dot */
        {
            puszFilter--;
            const char *pszStart = pszNext;
            do
            {
                if (rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter))
                    return true;
                int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false);
            } while ((intptr_t)pszDosDot - (intptr_t)pszNext >= -1);

            /* backtrack and do the current char. */
            pszNext = RTStrPrevCp(NULL, pszStart); AssertReturn(pszNext, false);
            return rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter);
        }

        /*
         * Ok, we've got zero or more characters.
         * We'll try match starting at each occurrence of this character.
         */
        default:
        {
            if (    RTUniCpToUpper(uc) == ucFilter
                &&  rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter))
                return true;
            do
            {
                int rc = RTStrGetCpEx(&pszNext, &uc); AssertRCReturn(rc, false);
                if (    RTUniCpToUpper(uc) == ucFilter
                    &&  rtDirFilterWinNtMatchBase(iDepth, pszNext, puszFilter))
                    return true;
            } while ((intptr_t)pszDosDot - (intptr_t)pszNext >= -1);
            return false;
        }
    }
    /* won't ever get here! */
}
Exemplo n.º 12
0
static void test2(RTTEST hTest)
{
    RTTestSub(hTest, "UTF-8 upper/lower encoding assumption");

#define CHECK_EQUAL(str1, str2) \
    do \
    { \
        RTTESTI_CHECK(strlen((str1).c_str()) == (str1).length()); \
        RTTESTI_CHECK((str1).length() == (str2).length()); \
        RTTESTI_CHECK(mymemcmp((str1).c_str(), (str2).c_str(), (str2).length() + 1) == 0); \
    } while (0)

    RTCString strTmp, strExpect;
    char szDst[16];

    /* Some simple ascii stuff. */
    strTmp    = "abcdefghijklmnopqrstuvwxyz0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\";
    strExpect = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\";
    strTmp.toUpper();
    CHECK_EQUAL(strTmp, strExpect);

    strTmp.toLower();
    strExpect = "abcdefghijklmnopqrstuvwxyz0123456abcdefghijklmnopqrstuvwxyz;-+/\\";
    CHECK_EQUAL(strTmp, strExpect);

    strTmp    = "abcdefghijklmnopqrstuvwxyz0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\";
    strTmp.toLower();
    CHECK_EQUAL(strTmp, strExpect);

    /* Collect all upper and lower case code points. */
    RTCString strLower("");
    strLower.reserve(_4M);

    RTCString strUpper("");
    strUpper.reserve(_4M);

    for (RTUNICP uc = 1; uc <= 0x10fffd; uc++)
    {
        /* Unicode 4.01, I think, introduced a few codepoints with lower/upper mappings
           that aren't up for roundtrips and which case folding has a different UTF-8
           length.  We'll just skip them here as there are very few:
            - Dotless small i and dotless capital I folds into ASCII I and i.
            - The small letter long s folds to ASCII S.
            - Greek prosgegrammeni folds to iota, which is a letter with both upper
              and lower case foldings of its own. */
        if (uc == 0x131 || uc == 0x130 || uc == 0x17f || 0x1fbe)
            continue;

        if (RTUniCpIsLower(uc))
        {
            RTTESTI_CHECK_MSG(uc < 0xd800 || (uc > 0xdfff && uc != 0xfffe && uc != 0xffff), ("%#x\n", uc));
            strLower.appendCodePoint(uc);
        }
        if (RTUniCpIsUpper(uc))
        {
            RTTESTI_CHECK_MSG(uc < 0xd800 || (uc > 0xdfff && uc != 0xfffe && uc != 0xffff), ("%#x\n", uc));
            strUpper.appendCodePoint(uc);
        }
    }
    RTTESTI_CHECK(strlen(strLower.c_str()) == strLower.length());
    RTTESTI_CHECK(strlen(strUpper.c_str()) == strUpper.length());

    /* Fold each code point in the lower case string and check that it encodes
       into the same or less number of bytes. */
    size_t      cch    = 0;
    const char *pszCur = strLower.c_str();
    RTCString    strUpper2("");
    strUpper2.reserve(strLower.length() + 64);
    for (;;)
    {
        RTUNICP             ucLower;
        const char * const  pszPrev   = pszCur;
        RTTESTI_CHECK_RC_BREAK(RTStrGetCpEx(&pszCur, &ucLower), VINF_SUCCESS);
        size_t const        cchSrc    = pszCur - pszPrev;
        if (!ucLower)
            break;

        RTUNICP const       ucUpper   = RTUniCpToUpper(ucLower);
        const char         *pszDstEnd = RTStrPutCp(szDst, ucUpper);
        size_t const        cchDst    = pszDstEnd - &szDst[0];
        RTTESTI_CHECK_MSG(cchSrc >= cchDst,
                          ("ucLower=%#x %u bytes;  ucUpper=%#x %u bytes\n",
                           ucLower, cchSrc, ucUpper, cchDst));
        cch += cchDst;
        strUpper2.appendCodePoint(ucUpper);

        /* roundtrip stability */
        RTUNICP const       ucUpper2  = RTUniCpToUpper(ucUpper);
        RTTESTI_CHECK_MSG(ucUpper2 == ucUpper, ("ucUpper2=%#x ucUpper=%#x\n", ucUpper2, ucUpper));

        RTUNICP const       ucLower2  = RTUniCpToLower(ucUpper);
        RTTESTI_CHECK_MSG(ucLower2 == ucLower, ("ucLower2=%#x ucLower=%#x\n", ucLower2, ucLower));
        RTUNICP const       ucUpper3  = RTUniCpToUpper(ucLower2);
        RTTESTI_CHECK_MSG(ucUpper3 == ucUpper, ("ucUpper3=%#x ucUpper=%#x\n", ucUpper3, ucUpper));

        pszDstEnd = RTStrPutCp(szDst, ucLower2);
        size_t const        cchLower2 = pszDstEnd - &szDst[0];
        RTTESTI_CHECK_MSG(cchDst == cchLower2,
                          ("ucLower2=%#x %u bytes;  ucUpper=%#x %u bytes; ucLower=%#x\n",
                           ucLower2, cchLower2, ucUpper, cchDst, ucLower));
    }
    RTTESTI_CHECK(strlen(strUpper2.c_str()) == strUpper2.length());
    RTTESTI_CHECK_MSG(cch == strUpper2.length(), ("cch=%u length()=%u\n", cch, strUpper2.length()));

    /* the toUpper method shall do the same thing. */
    strTmp = strLower;      CHECK_EQUAL(strTmp, strLower);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);

    /* Ditto for the upper case string. */
    cch    = 0;
    pszCur = strUpper.c_str();
    RTCString    strLower2("");
    strLower2.reserve(strUpper.length() + 64);
    for (;;)
    {
        RTUNICP             ucUpper;
        const char * const  pszPrev   = pszCur;
        RTTESTI_CHECK_RC_BREAK(RTStrGetCpEx(&pszCur, &ucUpper), VINF_SUCCESS);
        size_t const        cchSrc    = pszCur - pszPrev;
        if (!ucUpper)
            break;

        RTUNICP const       ucLower   = RTUniCpToLower(ucUpper);
        const char         *pszDstEnd = RTStrPutCp(szDst, ucLower);
        size_t const        cchDst    = pszDstEnd - &szDst[0];
        RTTESTI_CHECK_MSG(cchSrc >= cchDst,
                          ("ucUpper=%#x %u bytes;  ucLower=%#x %u bytes\n",
                           ucUpper, cchSrc, ucLower, cchDst));

        cch += cchDst;
        strLower2.appendCodePoint(ucLower);

        /* roundtrip stability */
        RTUNICP const       ucLower2  = RTUniCpToLower(ucLower);
        RTTESTI_CHECK_MSG(ucLower2 == ucLower, ("ucLower2=%#x ucLower=%#x\n", ucLower2, ucLower));

        RTUNICP const       ucUpper2  = RTUniCpToUpper(ucLower);
        RTTESTI_CHECK_MSG(ucUpper2 == ucUpper, ("ucUpper2=%#x ucUpper=%#x\n", ucUpper2, ucUpper));
        RTUNICP const       ucLower3  = RTUniCpToLower(ucUpper2);
        RTTESTI_CHECK_MSG(ucLower3 == ucLower, ("ucLower3=%#x ucLower=%#x\n", ucLower3, ucLower));

        pszDstEnd = RTStrPutCp(szDst, ucUpper2);
        size_t const        cchUpper2 = pszDstEnd - &szDst[0];
        RTTESTI_CHECK_MSG(cchDst == cchUpper2,
                          ("ucUpper2=%#x %u bytes;  ucLower=%#x %u bytes\n",
                           ucUpper2, cchUpper2, ucLower, cchDst));
    }
    RTTESTI_CHECK(strlen(strLower2.c_str()) == strLower2.length());
    RTTESTI_CHECK_MSG(cch == strLower2.length(), ("cch=%u length()=%u\n", cch, strLower2.length()));

    strTmp = strUpper;      CHECK_EQUAL(strTmp, strUpper);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);

    /* Checks of folding stability when nothing shall change. */
    strTmp = strUpper;      CHECK_EQUAL(strTmp, strUpper);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper);

    strTmp = strUpper2;     CHECK_EQUAL(strTmp, strUpper2);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);

    strTmp = strLower;      CHECK_EQUAL(strTmp, strLower);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower);

    strTmp = strLower2;     CHECK_EQUAL(strTmp, strLower2);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);

    /* Check folding stability for roundtrips. */
    strTmp = strUpper;      CHECK_EQUAL(strTmp, strUpper);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);
    strTmp.toUpper();
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);
    strTmp.toUpper();
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);

    strTmp = strLower;      CHECK_EQUAL(strTmp, strLower);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);
    strTmp.toLower();
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);
    strTmp.toLower();
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);
}
Exemplo n.º 13
0
RTDECL(char *) RTStrIStr(const char *pszHaystack, const char *pszNeedle)
{
    /* Any NULL strings means NULL return. (In the RTStrCmp tradition.) */
    if (!pszHaystack)
        return NULL;
    if (!pszNeedle)
        return NULL;

    /* The empty string matches everything. */
    if (!*pszNeedle)
        return (char *)pszHaystack;

    /*
     * The search strategy is to pick out the first char of the needle, fold it,
     * and match it against the haystack code point by code point. When encountering
     * a matching code point we use RTStrNICmp for the remainder (if any) of the needle.
     */
    const char * const pszNeedleStart = pszNeedle;
    RTUNICP Cp0;
    RTStrGetCpEx(&pszNeedle, &Cp0);     /* pszNeedle is advanced one code point. */
    size_t const    cchNeedle   = strlen(pszNeedle);
    size_t const    cchNeedleCp0= pszNeedle - pszNeedleStart;
    RTUNICP const   Cp0Lower    = RTUniCpToLower(Cp0);
    RTUNICP const   Cp0Upper    = RTUniCpToUpper(Cp0);
    if (    Cp0Lower == Cp0Upper
        &&  Cp0Lower == Cp0)
    {
        /* Cp0 is not a case sensitive char. */
        for (;;)
        {
            RTUNICP Cp;
            RTStrGetCpEx(&pszHaystack, &Cp);
            if (!Cp)
                break;
            if (    Cp == Cp0
                &&  !RTStrNICmp(pszHaystack, pszNeedle, cchNeedle))
                return (char *)pszHaystack - cchNeedleCp0;
        }
    }
    else if (   Cp0Lower == Cp0
             || Cp0Upper != Cp0)
    {
        /* Cp0 is case sensitive */
        for (;;)
        {
            RTUNICP Cp;
            RTStrGetCpEx(&pszHaystack, &Cp);
            if (!Cp)
                break;
            if (    (   Cp == Cp0Upper
                     || Cp == Cp0Lower)
                &&  !RTStrNICmp(pszHaystack, pszNeedle, cchNeedle))
                return (char *)pszHaystack - cchNeedleCp0;
        }
    }
    else
    {
        /* Cp0 is case sensitive and folds to two difference chars. (paranoia) */
        for (;;)
        {
            RTUNICP Cp;
            RTStrGetCpEx(&pszHaystack, &Cp);
            if (!Cp)
                break;
            if (    (   Cp == Cp0
                     || Cp == Cp0Upper
                     || Cp == Cp0Lower)
                &&  !RTStrNICmp(pszHaystack, pszNeedle, cchNeedle))
                return (char *)pszHaystack - cchNeedleCp0;
        }
    }


    return NULL;
}
Exemplo n.º 14
0
RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine,
                                   uint32_t fFlags, const char *pszSeparators)
{
    /*
     * Some input validation.
     */
    AssertPtr(pszCmdLine);
    AssertPtr(pcArgs);
    AssertPtr(ppapszArgv);
    AssertReturn(   fFlags == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH
                 || fFlags == RTGETOPTARGV_CNV_QUOTE_MS_CRT, VERR_INVALID_FLAGS);
    if (!pszSeparators)
        pszSeparators = " \t\n\r";
    else
        AssertPtr(pszSeparators);
    size_t const cchSeparators = strlen(pszSeparators);
    AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);

    /*
     * Parse the command line and chop off it into argv individual argv strings.
     */
    int         rc        = VINF_SUCCESS;
    const char *pszSrc    = pszCmdLine;
    char       *pszDup    = (char *)RTMemAlloc(strlen(pszSrc) + 1);
    char       *pszDst    = pszDup;
    if (!pszDup)
        return VERR_NO_STR_MEMORY;
    char      **papszArgs = NULL;
    unsigned    iArg      = 0;
    while (*pszSrc)
    {
        /* Skip stuff */
        rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
        if (RT_FAILURE(rc))
            break;
        if (!*pszSrc)
            break;

        /* Start a new entry. */
        if ((iArg % 32) == 0)
        {
            void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
            if (!pvNew)
            {
                rc = VERR_NO_MEMORY;
                break;
            }
            papszArgs = (char **)pvNew;
        }
        papszArgs[iArg++] = pszDst;

        /*
         * Parse and copy the string over.
         */
        RTUNICP Cp;
        if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH)
        {
            /*
             * Bourne shell style.
             */
            RTUNICP CpQuote = 0;
            for (;;)
            {
                rc = RTStrGetCpEx(&pszSrc, &Cp);
                if (RT_FAILURE(rc) || !Cp)
                    break;
                if (!CpQuote)
                {
                    if (Cp == '"' || Cp == '\'')
                        CpQuote = Cp;
                    else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
                        break;
                    else if (Cp != '\\')
                        pszDst = RTStrPutCp(pszDst, Cp);
                    else
                    {
                        /* escaped char */
                        rc = RTStrGetCpEx(&pszSrc, &Cp);
                        if (RT_FAILURE(rc) || !Cp)
                            break;
                        pszDst = RTStrPutCp(pszDst, Cp);
                    }
                }
                else if (CpQuote != Cp)
                {
                    if (Cp != '\\' || CpQuote == '\'')
                        pszDst = RTStrPutCp(pszDst, Cp);
                    else
                    {
                        /* escaped char */
                        rc = RTStrGetCpEx(&pszSrc, &Cp);
                        if (RT_FAILURE(rc) || !Cp)
                            break;
                        pszDst = RTStrPutCp(pszDst, Cp);
                    }
                }
                else
                    CpQuote = 0;
            }
        }
        else
        {
            /*
             * Microsoft CRT style.
             */
            Assert((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT);
            bool fInQuote = false;
            for (;;)
            {
                rc = RTStrGetCpEx(&pszSrc, &Cp);
                if (RT_FAILURE(rc) || !Cp)
                    break;
                if (Cp == '"')
                    fInQuote = !fInQuote;
                else if (!fInQuote && rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
                    break;
                else if (Cp != '\\')
                    pszDst = RTStrPutCp(pszDst, Cp);
                else
                {
                    /* A backslash sequence is only relevant if followed by
                       a double quote, then it will work like an escape char. */
                    size_t cQuotes = 1;
                    while (*pszSrc == '\\')
                    {
                        cQuotes++;
                        pszSrc++;
                    }
                    if (*pszSrc != '"')
                        /* Not an escape sequence.  */
                        while (cQuotes-- > 0)
                            pszDst = RTStrPutCp(pszDst, '\\');
                    else
                    {
                        /* Escape sequence.  Output half of the slashes.  If odd
                           number, output the escaped double quote . */
                        while (cQuotes >= 2)
                        {
                            pszDst = RTStrPutCp(pszDst, '\\');
                            cQuotes -= 2;
                        }
                        if (!cQuotes)
                            fInQuote = !fInQuote;
                        else
                            pszDst = RTStrPutCp(pszDst, '"');
                        pszSrc++;
                    }
                }
            }
        }

        *pszDst++ = '\0';
        if (RT_FAILURE(rc) || !Cp)
            break;
    }

    if (RT_FAILURE(rc))
    {
        RTMemFree(pszDup);
        RTMemFree(papszArgs);
        return rc;
    }

    /*
     * Terminate the array.
     * Check for empty string to make sure we've got an array.
     */
    if (iArg == 0)
    {
        RTMemFree(pszDup);
        papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
        if (!papszArgs)
            return VERR_NO_MEMORY;
    }
    papszArgs[iArg] = NULL;

    *pcArgs     = iArg;
    *ppapszArgv = papszArgs;
    return VINF_SUCCESS;
}
Exemplo n.º 15
0
RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine, const char *pszSeparators)
{
    /*
     * Some input validation.
     */
    AssertPtr(pszCmdLine);
    AssertPtr(pcArgs);
    AssertPtr(ppapszArgv);
    if (!pszSeparators)
        pszSeparators = " \t\n\r";
    else
        AssertPtr(pszSeparators);
    size_t const cchSeparators = strlen(pszSeparators);
    AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);

    /*
     * Parse the command line and chop off it into argv individual argv strings.
     */
    int         rc        = VINF_SUCCESS;
    const char *pszSrc    = pszCmdLine;
    char       *pszDup    = (char *)RTMemAlloc(strlen(pszSrc) + 1);
    char       *pszDst    = pszDup;
    if (!pszDup)
        return VERR_NO_STR_MEMORY;
    char      **papszArgs = NULL;
    unsigned    iArg      = 0;
    while (*pszSrc)
    {
        /* Skip stuff */
        rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
        if (RT_FAILURE(rc))
            break;
        if (!*pszSrc)
            break;

        /* Start a new entry. */
        if ((iArg % 32) == 0)
        {
            void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
            if (!pvNew)
            {
                rc = VERR_NO_MEMORY;
                break;
            }
            papszArgs = (char **)pvNew;
        }
        papszArgs[iArg++] = pszDst;

        /* Parse and copy the string over. */
        RTUNICP CpQuote = 0;
        RTUNICP Cp;
        for (;;)
        {
            rc = RTStrGetCpEx(&pszSrc, &Cp);
            if (RT_FAILURE(rc) || !Cp)
                break;
            if (!CpQuote)
            {
                if (Cp == '"' || Cp == '\'')
                    CpQuote = Cp;
                else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
                    break;
                else
                    pszDst = RTStrPutCp(pszDst, Cp);
            }
            else if (CpQuote != Cp)
                pszDst = RTStrPutCp(pszDst, Cp);
            else
                CpQuote = 0;
        }
        *pszDst++ = '\0';
        if (RT_FAILURE(rc) || !Cp)
            break;
    }

    if (RT_FAILURE(rc))
    {
        RTMemFree(pszDup);
        RTMemFree(papszArgs);
        return rc;
    }

    /*
     * Terminate the array.
     * Check for empty string to make sure we've got an array.
     */
    if (iArg == 0)
    {
        RTMemFree(pszDup);
        papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
        if (!papszArgs)
            return VERR_NO_MEMORY;
    }
    papszArgs[iArg] = NULL;

    *pcArgs     = iArg;
    *ppapszArgv = papszArgs;
    return VINF_SUCCESS;
}
Exemplo n.º 16
0
/**
 * Internal write API, stream lock already held.
 *
 * @returns IPRT status code.
 * @param   pStream             The stream.
 * @param   pvBuf               What to write.
 * @param   cbWrite             How much to write.
 * @param   pcbWritten          Where to optionally return the number of bytes
 *                              written.
 * @param   fSureIsText         Set if we're sure this is UTF-8 text already.
 */
static int rtStrmWriteLocked(PRTSTREAM pStream, const void *pvBuf, size_t cbWrite, size_t *pcbWritten,
                              bool fSureIsText)
{
    int rc = pStream->i32Error;
    if (RT_FAILURE(rc))
        return rc;
    if (pStream->fRecheckMode)
        rtStreamRecheckMode(pStream);

#ifdef RT_OS_WINDOWS
    /*
     * Use the unicode console API when possible in order to avoid stuff
     * getting lost in unnecessary code page translations.
     */
    HANDLE hCon;
    if (rtStrmIsConsoleUnlocked(pStream, &hCon))
    {
# ifdef HAVE_FWRITE_UNLOCKED
        if (!fflush_unlocked(pStream->pFile))
# else
        if (!fflush(pStream->pFile))
# endif
        {
            /** @todo Consider buffering later. For now, we'd rather correct output than
             *        fast output. */
            DWORD    cwcWritten = 0;
            PRTUTF16 pwszSrc = NULL;
            size_t   cwcSrc = 0;
            rc = RTStrToUtf16Ex((const char *)pvBuf, cbWrite, &pwszSrc, 0, &cwcSrc);
            if (RT_SUCCESS(rc))
            {
                if (!WriteConsoleW(hCon, pwszSrc, (DWORD)cwcSrc, &cwcWritten, NULL))
                {
                    /* try write char-by-char to avoid heap problem. */
                    cwcWritten = 0;
                    while (cwcWritten != cwcSrc)
                    {
                        DWORD cwcThis;
                        if (!WriteConsoleW(hCon, &pwszSrc[cwcWritten], 1, &cwcThis, NULL))
                        {
                            if (!pcbWritten || cwcWritten == 0)
                                rc = RTErrConvertFromErrno(GetLastError());
                            break;
                        }
                        if (cwcThis != 1) /* Unable to write current char (amount)? */
                            break;
                        cwcWritten++;
                    }
                }
                if (RT_SUCCESS(rc))
                {
                    if (cwcWritten == cwcSrc)
                    {
                        if (pcbWritten)
                            *pcbWritten = cbWrite;
                    }
                    else if (pcbWritten)
                    {
                        PCRTUTF16   pwszCur = pwszSrc;
                        const char *pszCur  = (const char *)pvBuf;
                        while ((uintptr_t)(pwszCur - pwszSrc) < cwcWritten)
                        {
                            RTUNICP CpIgnored;
                            RTUtf16GetCpEx(&pwszCur, &CpIgnored);
                            RTStrGetCpEx(&pszCur, &CpIgnored);
                        }
                        *pcbWritten = pszCur - (const char *)pvBuf;
                    }
                    else
                        rc = VERR_WRITE_ERROR;
                }
                RTUtf16Free(pwszSrc);
            }
        }
        else
            rc = RTErrConvertFromErrno(errno);
        if (RT_FAILURE(rc))
            ASMAtomicWriteS32(&pStream->i32Error, rc);
        return rc;
    }
#endif /* RT_OS_WINDOWS */

    /*
     * If we're sure it's text output, convert it from UTF-8 to the current
     * code page before printing it.
     *
     * Note! Partial writes are not supported in this scenario because we
     *       cannot easily report back a written length matching the input.
     */
    /** @todo Skip this if the current code set is UTF-8. */
    if (   pStream->fCurrentCodeSet
        && !pStream->fBinary
        && (   fSureIsText
            || rtStrmIsUtf8Text(pvBuf, cbWrite))
       )
    {
        char       *pszSrcFree = NULL;
        const char *pszSrc     = (const char *)pvBuf;
        if (pszSrc[cbWrite])
        {
            pszSrc = pszSrcFree = RTStrDupN(pszSrc, cbWrite);
            if (pszSrc == NULL)
                rc = VERR_NO_STR_MEMORY;
        }
        if (RT_SUCCESS(rc))
        {
            char *pszSrcCurCP;
            rc = RTStrUtf8ToCurrentCP(&pszSrcCurCP, pszSrc);
            if (RT_SUCCESS(rc))
            {
                size_t  cchSrcCurCP = strlen(pszSrcCurCP);
                IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc / mempcpy again */
#ifdef HAVE_FWRITE_UNLOCKED
                ssize_t cbWritten = fwrite_unlocked(pszSrcCurCP, cchSrcCurCP, 1, pStream->pFile);
#else
                ssize_t cbWritten = fwrite(pszSrcCurCP, cchSrcCurCP, 1, pStream->pFile);
#endif
                IPRT_ALIGNMENT_CHECKS_ENABLE();
                if (cbWritten == 1)
                {
                    if (pcbWritten)
                        *pcbWritten = cbWrite;
                }
#ifdef HAVE_FWRITE_UNLOCKED
                else if (!ferror_unlocked(pStream->pFile))
#else
                else if (!ferror(pStream->pFile))
#endif
                {
                    if (pcbWritten)
                        *pcbWritten = 0;
                }
                else
                    rc = VERR_WRITE_ERROR;
                RTStrFree(pszSrcCurCP);
            }
            RTStrFree(pszSrcFree);
        }

        if (RT_FAILURE(rc))
            ASMAtomicWriteS32(&pStream->i32Error, rc);
        return rc;
    }
RTDECL(int) RTVfsIoStrmValidateUtf8Encoding(RTVFSIOSTREAM hVfsIos, uint32_t fFlags, PRTFOFF poffError)
{
    /*
     * Validate input.
     */
    if (poffError)
    {
        AssertPtrReturn(poffError, VINF_SUCCESS);
        *poffError = 0;
    }
    AssertReturn(!(fFlags & ~RTVFS_VALIDATE_UTF8_VALID_MASK), VERR_INVALID_PARAMETER);

    /*
     * The loop.
     */
    char    achBuf[1024 + 1];
    size_t  cbUsed = 0;
    int     rc;
    for (;;)
    {
        /*
         * Fill the buffer
         */
        size_t cbRead = 0;
        rc = RTVfsIoStrmRead(hVfsIos, &achBuf[cbUsed], sizeof(achBuf) - cbUsed - 1, true /*fBlocking*/, &cbRead);
        if (RT_FAILURE(rc))
            break;
        cbUsed += cbRead;
        if (!cbUsed)
        {
            Assert(rc == VINF_EOF);
            break;
        }
        achBuf[sizeof(achBuf) - 1] = '\0';

        /*
         * Process the data in the buffer, maybe leaving the final chars till
         * the next round.
         */
        const char *pszCur = achBuf;
        size_t      offEnd = rc == VINF_EOF
                           ? cbUsed
                           : cbUsed >= 7
                           ? cbUsed - 7
                           : 0;
        size_t      off;
        while ((off = (pszCur - &achBuf[0])) < offEnd)
        {
            RTUNICP uc;
            rc = RTStrGetCpEx(&pszCur, &uc);
            if (RT_FAILURE(rc))
                break;
            if (!uc)
            {
                if (fFlags & RTVFS_VALIDATE_UTF8_NO_NULL)
                {
                    rc = VERR_INVALID_UTF8_ENCODING;
                    break;
                }
            }
            else if (uc > 0x10ffff)
            {
                if (fFlags & RTVFS_VALIDATE_UTF8_BY_RTC_3629)
                {
                    rc = VERR_INVALID_UTF8_ENCODING;
                    break;
                }
            }
        }

        if (off < cbUsed)
        {
            cbUsed -= off;
            memmove(achBuf, pszCur, cbUsed);
        }
    }

    /*
     * Set the offset on failure.
     */
    if (poffError && RT_FAILURE(rc))
    {
    }

    return rc == VINF_EOF ? VINF_SUCCESS : rc;
}