Beispiel #1
0
    // Convert UTF8 FastString to a wide char string
    ReadOnlyUnicodeString convert(const FastString & string)
    {
        // Create some space for the UCS-2 string
        ReadOnlyUnicodeString ret(0, string.getLength() + 1);
        ReadOnlyUnicodeString::CharType * data = const_cast<ReadOnlyUnicodeString::CharType *>(ret.getData());
        if (!data) return ret;

        ReadOnlyUnicodeString::CharType * dest = data;

        // Direct access to the buffer (don't bound check for every access)
        const uint8 * buffer = (const uint8*)string;
        for (int i = 0; i < string.getLength();)
        {
            // Get the current char
            const uint8 c = buffer [i++];

            // Check for UTF8 code
            if ((c & 0x80) != 0)
            {
                // The data is in the 7 low bits
                uint32 dataMask = 0x7f;
                // The count bit mask
                uint32 bitCountMask = 0x40;
                // The consumption count
                int charCount = 0;

                while ((c & bitCountMask) != 0 && bitCountMask)
                {
                    ++charCount;
                    dataMask >>= 1; bitCountMask >>= 1;
                }

                // Get the few bits remaining here
                int n = (c & dataMask);

                // Then extract the remaining bits
                while (--charCount >= 0 && i < string.getLength())
                {
                    const uint8 extra = buffer[i];
                    // Make sure it's a valid UTF8 encoding
                    if ((extra & 0xc0) != 0x80) break;

                    // Store the new bits too
                    n <<= 6; n |= (extra & 0x3f);
                    ++i;
                }

                *dest++ = (ReadOnlyUnicodeString::CharType)n;
            }
            else // Append the char as-is