C++ (Cpp) UTF8Chars示例

编程语言: C++ (Cpp)

类/类型: UTF8Chars

hotexamples.com的示例: 2

C++ (Cpp) UTF8Chars - 已找到2个示例。这些是从开源项目中提取的最受好评的UTF8Chars现实C++ (Cpp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

length(2)

常用方法

length (2)

示例#1

显示文件

文件： CharacterEncoding.cpp 项目： brendanlong/gecko-dev

static bool
InflateUTF8StringToBuffer(JSContext *cx, const UTF8Chars src, jschar *dst, size_t *dstlenp,
                          bool *isAsciip)
{
    *isAsciip = true;

    // First, count how many jschars need to be in the inflated string.
    // |i| is the index into |src|, and |j| is the the index into |dst|.
    size_t srclen = src.length();
    uint32_t j = 0;
    for (uint32_t i = 0; i < srclen; i++, j++) {
        uint32_t v = uint32_t(src[i]);
        if (!(v & 0x80)) {
            // ASCII code unit.  Simple copy.
            if (action == Copy)
                dst[j] = jschar(v);

        } else {
            // Non-ASCII code unit.  Determine its length in bytes (n).
            *isAsciip = false;
            uint32_t n = 1;
            while (v & (0x80 >> n))
                n++;

        #define INVALID(report, arg, n2)                                \
            do {                                                        \
                if (action == CountAndReportInvalids) {                 \
                    report(cx, arg);                                    \
                    return false;                                       \
                } else {                                                \
                    if (action == Copy)                                 \
                        dst[j] = jschar(REPLACE_UTF8);                  \
                    else                                                \
                        JS_ASSERT(action == CountAndIgnoreInvalids);    \
                    n = n2;                                             \
                    goto invalidMultiByteCodeUnit;                      \
                }                                                       \
            } while (0)

            // Check the leading byte.
            if (n < 2 || n > 4)
                INVALID(ReportInvalidCharacter, i, 1);

            // Check that |src| is large enough to hold an n-byte code unit.
            if (i + n > srclen)
                INVALID(ReportBufferTooSmall, /* dummy = */ 0, 1);

            // Check the second byte.  From Unicode Standard v6.2, Table 3-7
            // Well-Formed UTF-8 Byte Sequences.
            if ((v == 0xE0 && ((uint8_t)src[i + 1] & 0xE0) != 0xA0) ||  // E0 A0~BF
                (v == 0xED && ((uint8_t)src[i + 1] & 0xE0) != 0x80) ||  // ED 80~9F
                (v == 0xF0 && ((uint8_t)src[i + 1] & 0xF0) == 0x80) ||  // F0 90~BF
                (v == 0xF4 && ((uint8_t)src[i + 1] & 0xF0) != 0x80))    // F4 80~8F
            {
                INVALID(ReportInvalidCharacter, i, 1);
            }

            // Check the continuation bytes.
            for (uint32_t m = 1; m < n; m++)
                if ((src[i + m] & 0xC0) != 0x80)
                    INVALID(ReportInvalidCharacter, i, m);

            // Determine the code unit's length in jschars and act accordingly.
            v = Utf8ToOneUcs4Char((uint8_t *)&src[i], n);
            if (v < 0x10000) {
                // The n-byte UTF8 code unit will fit in a single jschar.
                if (action == Copy)
                    dst[j] = jschar(v);

            } else {
                v -= 0x10000;
                if (v <= 0xFFFFF) {
                    // The n-byte UTF8 code unit will fit in two jschars.
                    if (action == Copy)
                        dst[j] = jschar((v >> 10) + 0xD800);
                    j++;
                    if (action == Copy)
                        dst[j] = jschar((v & 0x3FF) + 0xDC00);

                } else {
                    // The n-byte UTF8 code unit won't fit in two jschars.
                    INVALID(ReportTooBigCharacter, v, 1);
                }
            }

          invalidMultiByteCodeUnit:
            // Move i to the last byte of the multi-byte code unit;  the loop
            // header will do the final i++ to move to the start of the next
            // code unit.
            i += n - 1;
        }
    }

示例#2

显示文件

文件： CharacterEncoding.cpp 项目： MichaelKohler/gecko-dev

static bool
InflateUTF8StringToBuffer(ContextT* cx, const UTF8Chars src, CharT* dst, size_t* dstlenp,
                          JS::SmallestEncoding *smallestEncoding)
{
    if (Action != AssertNoInvalids)
        *smallestEncoding = JS::SmallestEncoding::ASCII;
    auto RequireLatin1 = [&smallestEncoding]{
        *smallestEncoding = std::max(JS::SmallestEncoding::Latin1, *smallestEncoding);
    };
    auto RequireUTF16 = [&smallestEncoding]{
        *smallestEncoding = JS::SmallestEncoding::UTF16;
    };

    // Count how many code units need to be in the inflated string.
    // |i| is the index into |src|, and |j| is the the index into |dst|.
    size_t srclen = src.length();
    uint32_t j = 0;
    for (uint32_t i = 0; i < srclen; i++, j++) {
        uint32_t v = uint32_t(src[i]);
        if (!(v & 0x80)) {
            // ASCII code unit.  Simple copy.
            if (Action == Copy)
                dst[j] = CharT(v);

        } else {
            // Non-ASCII code unit.  Determine its length in bytes (n).
            uint32_t n = 1;
            while (v & (0x80 >> n))
                n++;

        #define INVALID(report, arg, n2)                                \
            do {                                                        \
                if (Action == CountAndReportInvalids) {                 \
                    report(cx, arg);                                    \
                    return false;                                       \
                } else if (Action == AssertNoInvalids) {                \
                    MOZ_CRASH("invalid UTF-8 string: " # report);       \
                } else {                                                \
                    if (Action == Copy) {                               \
                        if (std::is_same<decltype(dst[0]), Latin1Char>::value) \
                            dst[j] = CharT(REPLACE_UTF8_LATIN1);        \
                        else                                            \
                            dst[j] = CharT(REPLACE_UTF8);               \
                    } else {                                            \
                        MOZ_ASSERT(Action == CountAndIgnoreInvalids ||  \
                                   Action == FindEncoding);             \
                    }                                                   \
                    n = n2;                                             \
                    goto invalidMultiByteCodeUnit;                      \
                }                                                       \
            } while (0)

            // Check the leading byte.
            if (n < 2 || n > 4)
                INVALID(ReportInvalidCharacter, i, 1);

            // Check that |src| is large enough to hold an n-byte code unit.
            if (i + n > srclen)
                INVALID(ReportBufferTooSmall, /* dummy = */ 0, 1);

            // Check the second byte.  From Unicode Standard v6.2, Table 3-7
            // Well-Formed UTF-8 Byte Sequences.
            if ((v == 0xE0 && ((uint8_t)src[i + 1] & 0xE0) != 0xA0) ||  // E0 A0~BF
                (v == 0xED && ((uint8_t)src[i + 1] & 0xE0) != 0x80) ||  // ED 80~9F
                (v == 0xF0 && ((uint8_t)src[i + 1] & 0xF0) == 0x80) ||  // F0 90~BF
                (v == 0xF4 && ((uint8_t)src[i + 1] & 0xF0) != 0x80))    // F4 80~8F
            {
                INVALID(ReportInvalidCharacter, i, 1);
            }

            // Check the continuation bytes.
            for (uint32_t m = 1; m < n; m++) {
                if ((src[i + m] & 0xC0) != 0x80)
                    INVALID(ReportInvalidCharacter, i, m);
            }

            // Determine the code unit's length in CharT and act accordingly.
            v = JS::Utf8ToOneUcs4Char((uint8_t*)&src[i], n);
            if (Action != AssertNoInvalids) {
                if (v > 0xff) {
                    RequireUTF16();
                    if (Action == FindEncoding) {
                        MOZ_ASSERT(dst == nullptr);
                        return true;
                    }
                } else {
                    RequireLatin1();
                }
            }
            if (v < 0x10000) {
                // The n-byte UTF8 code unit will fit in a single CharT.
                if (Action == Copy)
                    dst[j] = CharT(v);
            } else {
                v -= 0x10000;
                if (v <= 0xFFFFF) {
                    // The n-byte UTF8 code unit will fit in two CharT units.
                    if (Action == Copy)
                        dst[j] = CharT((v >> 10) + 0xD800);
                    j++;
                    if (Action == Copy)
                        dst[j] = CharT((v & 0x3FF) + 0xDC00);

                } else {
                    // The n-byte UTF8 code unit won't fit in two CharT units.
                    INVALID(ReportTooBigCharacter, v, 1);
                }
            }

          invalidMultiByteCodeUnit:
            // Move i to the last byte of the multi-byte code unit;  the loop
            // header will do the final i++ to move to the start of the next
            // code unit.
            i += n - 1;
            if (Action != AssertNoInvalids)
                RequireUTF16();
        }
    }