// returns true if we performed an UTF-8 decoding static bool encodedUtf8ToUtf16(QString &result, ushort *&output, const ushort *begin, const ushort *&input, const ushort *end, ushort decoded) { int charsNeeded; uint min_uc; uint uc; if (decoded <= 0xC1) { // an UTF-8 first character must be at least 0xC0 // however, all 0xC0 and 0xC1 first bytes can only produce overlong sequences return false; } else if (decoded < 0xe0) { charsNeeded = 2; min_uc = 0x80; uc = decoded & 0x1f; } else if (decoded < 0xf0) { charsNeeded = 3; min_uc = 0x800; uc = decoded & 0x0f; } else if (decoded < 0xf5) { charsNeeded = 4; min_uc = 0x10000; uc = decoded & 0x07; } else { // the last Unicode character is U+10FFFF // it's encoded in UTF-8 as "\xF4\x8F\xBF\xBF" // therefore, a byte higher than 0xF4 is not the UTF-8 first byte return false; } // are there enough remaining? if (end - input < 3*charsNeeded) return false; if (input[3] != '%') return false; // first continuation character decoded = decodePercentEncoding(input + 3); if ((decoded & 0xc0) != 0x80) return false; uc <<= 6; uc |= decoded & 0x3f; if (charsNeeded > 2) { if (input[6] != '%') return false; // second continuation character decoded = decodePercentEncoding(input + 6); if ((decoded & 0xc0) != 0x80) return false; uc <<= 6; uc |= decoded & 0x3f; if (charsNeeded > 3) { if (input[9] != '%') return false; // third continuation character decoded = decodePercentEncoding(input + 9); if ((decoded & 0xc0) != 0x80) return false; uc <<= 6; uc |= decoded & 0x3f; } } // we've decoded something; safety-check it if (uc < min_uc) return false; if (isSurrogate(uc) || isNonCharacter(uc) || uc > 0x10ffff/*QChar::LastValidCodePoint*/) return false; if (!QChar::requiresSurrogates(uc)) { // UTF-8 decoded and no surrogates are required // detach if necessary ensureDetached(result, output, begin, input, end, -9 * charsNeeded + 1); *output++ = uc; } else { // UTF-8 decoded to something that requires a surrogate pair ensureDetached(result, output, begin, input, end, -9 * charsNeeded + 2); *output++ = QChar::highSurrogate(uc); *output++ = QChar::lowSurrogate(uc); } input += charsNeeded * 3 - 1; return true; }
// Returns true if the code point is an Unicode character (that is, // an Unicode scalar value excluding noncharacters). inline constexpr bool isUnicodeCharacter(code_point_t code_point) { return isUnicodeScalarValue(code_point) and not isNonCharacter(code_point); }