Beispiel #1
0
void plString::IConvertFromUtf16(const uint16_t *utf16, size_t size)
{
    hsAssert(size == STRLEN_AUTO || size < FREAKING_BIG, "Your string is WAAAAAY too big");

    fUtf8Buffer = plStringBuffer<char>();
    if (!utf16)
        return;

    if (size == STRLEN_AUTO)
        size = u16slen(utf16);

    // Calculate the UTF-8 size
    size_t convlen = 0;
    const uint16_t *sp = utf16;
    while (sp < utf16 + size) {
        if (*sp >= 0xD800 && *sp <= 0xDFFF) {
            // Surrogate pair
            convlen += 4;
            ++sp;
        }
        else if (*sp > 0x7FF)
            convlen += 3;
        else if (*sp > 0x7F)
            convlen += 2;
        else
            convlen += 1;
        ++sp;
    }

    // And perform the actual conversion
    char *utf8 = fUtf8Buffer.CreateWritableBuffer(convlen);
    char *dp = utf8;
    sp = utf16;
    while (sp < utf16 + size) {
        if (*sp >= 0xD800 && *sp <= 0xDFFF) {
            // Surrogate pair
            UniChar unichar = 0x10000;

            if (sp + 1 >= utf16 + size) {
                hsAssert(0, "Incomplete surrogate pair in UTF-16 data");
                unichar = BADCHAR_REPLACEMENT;
            } else if (*sp < 0xDC00) {
                unichar += (*sp++ & 0x3FF) << 10;
                hsAssert(*sp >= 0xDC00 && *sp <= 0xDFFF,
                         "Invalid surrogate pair in UTF-16 data");
                unichar += (*sp   & 0x3FF);
            } else {
                unichar += (*sp++ & 0x3FF);
                hsAssert(*sp >= 0xD800 && *sp <  0xDC00,
                         "Invalid surrogate pair in UTF-16 data");
                unichar += (*sp   & 0x3FF) << 10;
            }
            *dp++ = 0xF0 | ((unichar >> 18) & 0x07);
            *dp++ = 0x80 | ((unichar >> 12) & 0x3F);
            *dp++ = 0x80 | ((unichar >>  6) & 0x3F);
            *dp++ = 0x80 | ((unichar      ) & 0x3F);
        } else if (*sp > 0x7FF) {
void plString::IConvertFromUtf16(const uint16_t *utf16, size_t size)
{
    if (utf16 == nil) {
        fUtf8Buffer = plStringBuffer<char>();
        return;
    }

    if ((int32_t)size < 0)
        size = u16slen(utf16, -(int32_t)size);

    // Calculate the UTF-8 size
    size_t convlen = 0;
    const uint16_t *sp = utf16;
    while (sp < utf16 + size) {
        if (*sp >= 0xD800 && *sp <= 0xDFFF) {
            // Surrogate pair
            convlen += 4;
            ++sp;
        }
        else if (*sp > 0x7FF)
            convlen += 3;
        else if (*sp > 0x7F)
            convlen += 2;
        else
            convlen += 1;
        ++sp;
    }

    // And perform the actual conversion
    char *utf8 = new char[convlen + 1];
    char *dp = utf8;
    sp = utf16;
    while (sp < utf16 + size) {
        if (*sp >= 0xD800 && *sp <= 0xDFFF) {
            // Surrogate pair
            unsigned int unichar = 0x10000;

            if (sp + 1 >= utf16 + size) {
                hsAssert(0, "Incomplete surrogate pair in UTF-16 data");
                unichar = BADCHAR_REPLACEMENT;
            } else if (*sp < 0xDC00) {
                unichar += (*sp++ & 0x3FF) << 10;
                hsAssert(*sp >= 0xDC00 && *sp <= 0xDFFF,
                         "Invalid surrogate pair in UTF-16 data");
                unichar += (*sp   & 0x3FF);
            } else {
                unichar += (*sp++ & 0x3FF);
                hsAssert(*sp >= 0xD800 && *sp <  0xDC00,
                         "Invalid surrogate pair in UTF-16 data");
                unichar += (*sp   & 0x3FF) << 10;
            }
            *dp++ = 0xF0 | ((unichar >> 18) & 0x07);
            *dp++ = 0x80 | ((unichar >> 12) & 0x3F);
            *dp++ = 0x80 | ((unichar >>  6) & 0x3F);
            *dp++ = 0x80 | ((unichar      ) & 0x3F);
        } else if (*sp > 0x7FF) {