Esempio n. 1
0
RTDECL(char *) RTStrToUpper(char *psz)
{
    /*
     * Loop the code points in the string, converting them one by one.
     *
     * ASSUMES that the folded code points have an encoding that is equal or
     *         shorter than the original (this is presently correct).
     */
    const char *pszSrc = psz;
    char       *pszDst = psz;
    RTUNICP     uc;
    do
    {
        int rc = RTStrGetCpEx(&pszSrc, &uc);
        if (RT_SUCCESS(rc))
        {
            RTUNICP uc2 = RTUniCpToUpper(uc);
            if (RT_LIKELY(   uc2 == uc
                          || RTUniCpCalcUtf8Len(uc2) == RTUniCpCalcUtf8Len(uc)))
                pszDst = RTStrPutCp(pszDst, uc2);
            else
                pszDst = RTStrPutCp(pszDst, uc);
        }
        else
        {
            /* bad encoding, just copy it quietly (uc == RTUNICP_INVALID (!= 0)). */
            AssertRC(rc);
            *pszDst++ = pszSrc[-1];
        }
        Assert((uintptr_t)pszDst <= (uintptr_t)pszSrc);
    } while (uc != 0);

    return psz;
}
RTCString &RTCString::appendCodePoint(RTUNICP uc)
{
    /*
     * Single byte encoding.
     */
    if (uc < 0x80)
        return RTCString::append((char)uc);

    /*
     * Multibyte encoding.
     * Assume max encoding length when resizing the string, that's simpler.
     */
    AssertReturn(uc <= UINT32_C(0x7fffffff), *this);

    if (m_cch + 6 >= m_cbAllocated)
    {
        reserve(RT_ALIGN_Z(m_cch + 6 + 1, IPRT_MINISTRING_APPEND_ALIGNMENT));
        // calls realloc(cbBoth) and sets m_cbAllocated; may throw bad_alloc.
#ifndef RT_EXCEPTIONS_ENABLED
        AssertRelease(capacity() > m_cch + 6);
#endif
    }

    char *pszNext = RTStrPutCp(&m_psz[m_cch], uc);
    m_cch = pszNext - m_psz;
    *pszNext = '\0';

    return *this;
}
Esempio n. 3
0
static void test2(RTTEST hTest)
{
    RTTestSub(hTest, "UTF-8 upper/lower encoding assumption");

#define CHECK_EQUAL(str1, str2) \
    do \
    { \
        RTTESTI_CHECK(strlen((str1).c_str()) == (str1).length()); \
        RTTESTI_CHECK((str1).length() == (str2).length()); \
        RTTESTI_CHECK(mymemcmp((str1).c_str(), (str2).c_str(), (str2).length() + 1) == 0); \
    } while (0)

    RTCString strTmp, strExpect;
    char szDst[16];

    /* Some simple ascii stuff. */
    strTmp    = "abcdefghijklmnopqrstuvwxyz0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\";
    strExpect = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\";
    strTmp.toUpper();
    CHECK_EQUAL(strTmp, strExpect);

    strTmp.toLower();
    strExpect = "abcdefghijklmnopqrstuvwxyz0123456abcdefghijklmnopqrstuvwxyz;-+/\\";
    CHECK_EQUAL(strTmp, strExpect);

    strTmp    = "abcdefghijklmnopqrstuvwxyz0123456ABCDEFGHIJKLMNOPQRSTUVWXYZ;-+/\\";
    strTmp.toLower();
    CHECK_EQUAL(strTmp, strExpect);

    /* Collect all upper and lower case code points. */
    RTCString strLower("");
    strLower.reserve(_4M);

    RTCString strUpper("");
    strUpper.reserve(_4M);

    for (RTUNICP uc = 1; uc <= 0x10fffd; uc++)
    {
        /* Unicode 4.01, I think, introduced a few codepoints with lower/upper mappings
           that aren't up for roundtrips and which case folding has a different UTF-8
           length.  We'll just skip them here as there are very few:
            - Dotless small i and dotless capital I folds into ASCII I and i.
            - The small letter long s folds to ASCII S.
            - Greek prosgegrammeni folds to iota, which is a letter with both upper
              and lower case foldings of its own. */
        if (uc == 0x131 || uc == 0x130 || uc == 0x17f || 0x1fbe)
            continue;

        if (RTUniCpIsLower(uc))
        {
            RTTESTI_CHECK_MSG(uc < 0xd800 || (uc > 0xdfff && uc != 0xfffe && uc != 0xffff), ("%#x\n", uc));
            strLower.appendCodePoint(uc);
        }
        if (RTUniCpIsUpper(uc))
        {
            RTTESTI_CHECK_MSG(uc < 0xd800 || (uc > 0xdfff && uc != 0xfffe && uc != 0xffff), ("%#x\n", uc));
            strUpper.appendCodePoint(uc);
        }
    }
    RTTESTI_CHECK(strlen(strLower.c_str()) == strLower.length());
    RTTESTI_CHECK(strlen(strUpper.c_str()) == strUpper.length());

    /* Fold each code point in the lower case string and check that it encodes
       into the same or less number of bytes. */
    size_t      cch    = 0;
    const char *pszCur = strLower.c_str();
    RTCString    strUpper2("");
    strUpper2.reserve(strLower.length() + 64);
    for (;;)
    {
        RTUNICP             ucLower;
        const char * const  pszPrev   = pszCur;
        RTTESTI_CHECK_RC_BREAK(RTStrGetCpEx(&pszCur, &ucLower), VINF_SUCCESS);
        size_t const        cchSrc    = pszCur - pszPrev;
        if (!ucLower)
            break;

        RTUNICP const       ucUpper   = RTUniCpToUpper(ucLower);
        const char         *pszDstEnd = RTStrPutCp(szDst, ucUpper);
        size_t const        cchDst    = pszDstEnd - &szDst[0];
        RTTESTI_CHECK_MSG(cchSrc >= cchDst,
                          ("ucLower=%#x %u bytes;  ucUpper=%#x %u bytes\n",
                           ucLower, cchSrc, ucUpper, cchDst));
        cch += cchDst;
        strUpper2.appendCodePoint(ucUpper);

        /* roundtrip stability */
        RTUNICP const       ucUpper2  = RTUniCpToUpper(ucUpper);
        RTTESTI_CHECK_MSG(ucUpper2 == ucUpper, ("ucUpper2=%#x ucUpper=%#x\n", ucUpper2, ucUpper));

        RTUNICP const       ucLower2  = RTUniCpToLower(ucUpper);
        RTTESTI_CHECK_MSG(ucLower2 == ucLower, ("ucLower2=%#x ucLower=%#x\n", ucLower2, ucLower));
        RTUNICP const       ucUpper3  = RTUniCpToUpper(ucLower2);
        RTTESTI_CHECK_MSG(ucUpper3 == ucUpper, ("ucUpper3=%#x ucUpper=%#x\n", ucUpper3, ucUpper));

        pszDstEnd = RTStrPutCp(szDst, ucLower2);
        size_t const        cchLower2 = pszDstEnd - &szDst[0];
        RTTESTI_CHECK_MSG(cchDst == cchLower2,
                          ("ucLower2=%#x %u bytes;  ucUpper=%#x %u bytes; ucLower=%#x\n",
                           ucLower2, cchLower2, ucUpper, cchDst, ucLower));
    }
    RTTESTI_CHECK(strlen(strUpper2.c_str()) == strUpper2.length());
    RTTESTI_CHECK_MSG(cch == strUpper2.length(), ("cch=%u length()=%u\n", cch, strUpper2.length()));

    /* the toUpper method shall do the same thing. */
    strTmp = strLower;      CHECK_EQUAL(strTmp, strLower);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);

    /* Ditto for the upper case string. */
    cch    = 0;
    pszCur = strUpper.c_str();
    RTCString    strLower2("");
    strLower2.reserve(strUpper.length() + 64);
    for (;;)
    {
        RTUNICP             ucUpper;
        const char * const  pszPrev   = pszCur;
        RTTESTI_CHECK_RC_BREAK(RTStrGetCpEx(&pszCur, &ucUpper), VINF_SUCCESS);
        size_t const        cchSrc    = pszCur - pszPrev;
        if (!ucUpper)
            break;

        RTUNICP const       ucLower   = RTUniCpToLower(ucUpper);
        const char         *pszDstEnd = RTStrPutCp(szDst, ucLower);
        size_t const        cchDst    = pszDstEnd - &szDst[0];
        RTTESTI_CHECK_MSG(cchSrc >= cchDst,
                          ("ucUpper=%#x %u bytes;  ucLower=%#x %u bytes\n",
                           ucUpper, cchSrc, ucLower, cchDst));

        cch += cchDst;
        strLower2.appendCodePoint(ucLower);

        /* roundtrip stability */
        RTUNICP const       ucLower2  = RTUniCpToLower(ucLower);
        RTTESTI_CHECK_MSG(ucLower2 == ucLower, ("ucLower2=%#x ucLower=%#x\n", ucLower2, ucLower));

        RTUNICP const       ucUpper2  = RTUniCpToUpper(ucLower);
        RTTESTI_CHECK_MSG(ucUpper2 == ucUpper, ("ucUpper2=%#x ucUpper=%#x\n", ucUpper2, ucUpper));
        RTUNICP const       ucLower3  = RTUniCpToLower(ucUpper2);
        RTTESTI_CHECK_MSG(ucLower3 == ucLower, ("ucLower3=%#x ucLower=%#x\n", ucLower3, ucLower));

        pszDstEnd = RTStrPutCp(szDst, ucUpper2);
        size_t const        cchUpper2 = pszDstEnd - &szDst[0];
        RTTESTI_CHECK_MSG(cchDst == cchUpper2,
                          ("ucUpper2=%#x %u bytes;  ucLower=%#x %u bytes\n",
                           ucUpper2, cchUpper2, ucLower, cchDst));
    }
    RTTESTI_CHECK(strlen(strLower2.c_str()) == strLower2.length());
    RTTESTI_CHECK_MSG(cch == strLower2.length(), ("cch=%u length()=%u\n", cch, strLower2.length()));

    strTmp = strUpper;      CHECK_EQUAL(strTmp, strUpper);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);

    /* Checks of folding stability when nothing shall change. */
    strTmp = strUpper;      CHECK_EQUAL(strTmp, strUpper);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper);

    strTmp = strUpper2;     CHECK_EQUAL(strTmp, strUpper2);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);

    strTmp = strLower;      CHECK_EQUAL(strTmp, strLower);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower);

    strTmp = strLower2;     CHECK_EQUAL(strTmp, strLower2);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);

    /* Check folding stability for roundtrips. */
    strTmp = strUpper;      CHECK_EQUAL(strTmp, strUpper);
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);
    strTmp.toUpper();
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);
    strTmp.toUpper();
    strTmp.toLower();       CHECK_EQUAL(strTmp, strLower2);

    strTmp = strLower;      CHECK_EQUAL(strTmp, strLower);
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);
    strTmp.toLower();
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);
    strTmp.toLower();
    strTmp.toUpper();       CHECK_EQUAL(strTmp, strUpper2);
}
Esempio n. 4
0
RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine, const char *pszSeparators)
{
    /*
     * Some input validation.
     */
    AssertPtr(pszCmdLine);
    AssertPtr(pcArgs);
    AssertPtr(ppapszArgv);
    if (!pszSeparators)
        pszSeparators = " \t\n\r";
    else
        AssertPtr(pszSeparators);
    size_t const cchSeparators = strlen(pszSeparators);
    AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);

    /*
     * Parse the command line and chop off it into argv individual argv strings.
     */
    int         rc        = VINF_SUCCESS;
    const char *pszSrc    = pszCmdLine;
    char       *pszDup    = (char *)RTMemAlloc(strlen(pszSrc) + 1);
    char       *pszDst    = pszDup;
    if (!pszDup)
        return VERR_NO_STR_MEMORY;
    char      **papszArgs = NULL;
    unsigned    iArg      = 0;
    while (*pszSrc)
    {
        /* Skip stuff */
        rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
        if (RT_FAILURE(rc))
            break;
        if (!*pszSrc)
            break;

        /* Start a new entry. */
        if ((iArg % 32) == 0)
        {
            void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
            if (!pvNew)
            {
                rc = VERR_NO_MEMORY;
                break;
            }
            papszArgs = (char **)pvNew;
        }
        papszArgs[iArg++] = pszDst;

        /* Parse and copy the string over. */
        RTUNICP CpQuote = 0;
        RTUNICP Cp;
        for (;;)
        {
            rc = RTStrGetCpEx(&pszSrc, &Cp);
            if (RT_FAILURE(rc) || !Cp)
                break;
            if (!CpQuote)
            {
                if (Cp == '"' || Cp == '\'')
                    CpQuote = Cp;
                else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
                    break;
                else
                    pszDst = RTStrPutCp(pszDst, Cp);
            }
            else if (CpQuote != Cp)
                pszDst = RTStrPutCp(pszDst, Cp);
            else
                CpQuote = 0;
        }
        *pszDst++ = '\0';
        if (RT_FAILURE(rc) || !Cp)
            break;
    }

    if (RT_FAILURE(rc))
    {
        RTMemFree(pszDup);
        RTMemFree(papszArgs);
        return rc;
    }

    /*
     * Terminate the array.
     * Check for empty string to make sure we've got an array.
     */
    if (iArg == 0)
    {
        RTMemFree(pszDup);
        papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
        if (!papszArgs)
            return VERR_NO_MEMORY;
    }
    papszArgs[iArg] = NULL;

    *pcArgs     = iArg;
    *ppapszArgv = papszArgs;
    return VINF_SUCCESS;
}
Esempio n. 5
0
RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine,
                                   uint32_t fFlags, const char *pszSeparators)
{
    /*
     * Some input validation.
     */
    AssertPtr(pszCmdLine);
    AssertPtr(pcArgs);
    AssertPtr(ppapszArgv);
    AssertReturn(   fFlags == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH
                 || fFlags == RTGETOPTARGV_CNV_QUOTE_MS_CRT, VERR_INVALID_FLAGS);
    if (!pszSeparators)
        pszSeparators = " \t\n\r";
    else
        AssertPtr(pszSeparators);
    size_t const cchSeparators = strlen(pszSeparators);
    AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);

    /*
     * Parse the command line and chop off it into argv individual argv strings.
     */
    int         rc        = VINF_SUCCESS;
    const char *pszSrc    = pszCmdLine;
    char       *pszDup    = (char *)RTMemAlloc(strlen(pszSrc) + 1);
    char       *pszDst    = pszDup;
    if (!pszDup)
        return VERR_NO_STR_MEMORY;
    char      **papszArgs = NULL;
    unsigned    iArg      = 0;
    while (*pszSrc)
    {
        /* Skip stuff */
        rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
        if (RT_FAILURE(rc))
            break;
        if (!*pszSrc)
            break;

        /* Start a new entry. */
        if ((iArg % 32) == 0)
        {
            void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
            if (!pvNew)
            {
                rc = VERR_NO_MEMORY;
                break;
            }
            papszArgs = (char **)pvNew;
        }
        papszArgs[iArg++] = pszDst;

        /*
         * Parse and copy the string over.
         */
        RTUNICP Cp;
        if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH)
        {
            /*
             * Bourne shell style.
             */
            RTUNICP CpQuote = 0;
            for (;;)
            {
                rc = RTStrGetCpEx(&pszSrc, &Cp);
                if (RT_FAILURE(rc) || !Cp)
                    break;
                if (!CpQuote)
                {
                    if (Cp == '"' || Cp == '\'')
                        CpQuote = Cp;
                    else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
                        break;
                    else if (Cp != '\\')
                        pszDst = RTStrPutCp(pszDst, Cp);
                    else
                    {
                        /* escaped char */
                        rc = RTStrGetCpEx(&pszSrc, &Cp);
                        if (RT_FAILURE(rc) || !Cp)
                            break;
                        pszDst = RTStrPutCp(pszDst, Cp);
                    }
                }
                else if (CpQuote != Cp)
                {
                    if (Cp != '\\' || CpQuote == '\'')
                        pszDst = RTStrPutCp(pszDst, Cp);
                    else
                    {
                        /* escaped char */
                        rc = RTStrGetCpEx(&pszSrc, &Cp);
                        if (RT_FAILURE(rc) || !Cp)
                            break;
                        pszDst = RTStrPutCp(pszDst, Cp);
                    }
                }
                else
                    CpQuote = 0;
            }
        }
        else
        {
            /*
             * Microsoft CRT style.
             */
            Assert((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT);
            bool fInQuote = false;
            for (;;)
            {
                rc = RTStrGetCpEx(&pszSrc, &Cp);
                if (RT_FAILURE(rc) || !Cp)
                    break;
                if (Cp == '"')
                    fInQuote = !fInQuote;
                else if (!fInQuote && rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
                    break;
                else if (Cp != '\\')
                    pszDst = RTStrPutCp(pszDst, Cp);
                else
                {
                    /* A backslash sequence is only relevant if followed by
                       a double quote, then it will work like an escape char. */
                    size_t cQuotes = 1;
                    while (*pszSrc == '\\')
                    {
                        cQuotes++;
                        pszSrc++;
                    }
                    if (*pszSrc != '"')
                        /* Not an escape sequence.  */
                        while (cQuotes-- > 0)
                            pszDst = RTStrPutCp(pszDst, '\\');
                    else
                    {
                        /* Escape sequence.  Output half of the slashes.  If odd
                           number, output the escaped double quote . */
                        while (cQuotes >= 2)
                        {
                            pszDst = RTStrPutCp(pszDst, '\\');
                            cQuotes -= 2;
                        }
                        if (!cQuotes)
                            fInQuote = !fInQuote;
                        else
                            pszDst = RTStrPutCp(pszDst, '"');
                        pszSrc++;
                    }
                }
            }
        }

        *pszDst++ = '\0';
        if (RT_FAILURE(rc) || !Cp)
            break;
    }

    if (RT_FAILURE(rc))
    {
        RTMemFree(pszDup);
        RTMemFree(papszArgs);
        return rc;
    }

    /*
     * Terminate the array.
     * Check for empty string to make sure we've got an array.
     */
    if (iArg == 0)
    {
        RTMemFree(pszDup);
        papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
        if (!papszArgs)
            return VERR_NO_MEMORY;
    }
    papszArgs[iArg] = NULL;

    *pcArgs     = iArg;
    *ppapszArgv = papszArgs;
    return VINF_SUCCESS;
}