Bool UnicodeSanityCheck(const void *buffer, // IN ssize_t lengthInBytes, // IN StringEncoding encoding) // IN { ASSERT(Unicode_IsEncodingValid(encoding)); /* * Sanity check US-ASCII here, so we can fast-path its conversion * to Unicode later. */ if (encoding == STRING_ENCODING_US_ASCII) { const uint8 *asciiBytes = (const uint8 *) buffer; if (lengthInBytes == -1) { for (; *asciiBytes != '\0'; asciiBytes++) { if (*asciiBytes >= 0x80) { return FALSE; } } } else { ssize_t i; for (i = 0; i < lengthInBytes; i++) { if (asciiBytes[i] >= 0x80) { return FALSE; } } } } return TRUE; }
Unicode UnicodeAllocInternal(const void *buffer, // IN ssize_t lengthInBytes, // IN StringEncoding encoding, // IN Bool strict) // IN { char *utf8Result = NULL; ASSERT(buffer != NULL); ASSERT(lengthInBytes >= 0); ASSERT(Unicode_IsEncodingValid(encoding)); if (!strict) { CodeSet_GenericToGeneric(Unicode_EncodingEnumToName(encoding), buffer, lengthInBytes, "UTF-8", CSGTG_TRANSLIT, &utf8Result, NULL); return utf8Result; } switch (encoding) { case STRING_ENCODING_US_ASCII: case STRING_ENCODING_UTF8: if (Unicode_IsBufferValid(buffer, lengthInBytes, encoding)) { utf8Result = Util_SafeStrndup(buffer, lengthInBytes); } break; case STRING_ENCODING_UTF16_LE: // utf8Result will be left NULL on failure. CodeSet_Utf16leToUtf8((const char *)buffer, lengthInBytes, &utf8Result, NULL); break; default: CodeSet_GenericToGeneric(Unicode_EncodingEnumToName(encoding), buffer, lengthInBytes, "UTF-8", 0, &utf8Result, NULL); break; } return (Unicode)utf8Result; }