Unicode UnicodeAllocInternal(const void *buffer, // IN ssize_t lengthInBytes, // IN StringEncoding encoding, // IN Bool strict) // IN { char *utf8Result = NULL; ASSERT(buffer != NULL); ASSERT(lengthInBytes >= 0); ASSERT(Unicode_IsEncodingValid(encoding)); if (!strict) { CodeSet_GenericToGeneric(Unicode_EncodingEnumToName(encoding), buffer, lengthInBytes, "UTF-8", CSGTG_TRANSLIT, &utf8Result, NULL); return utf8Result; } switch (encoding) { case STRING_ENCODING_US_ASCII: case STRING_ENCODING_UTF8: if (Unicode_IsBufferValid(buffer, lengthInBytes, encoding)) { utf8Result = Util_SafeStrndup(buffer, lengthInBytes); } break; case STRING_ENCODING_UTF16_LE: // utf8Result will be left NULL on failure. CodeSet_Utf16leToUtf8((const char *)buffer, lengthInBytes, &utf8Result, NULL); break; default: CodeSet_GenericToGeneric(Unicode_EncodingEnumToName(encoding), buffer, lengthInBytes, "UTF-8", 0, &utf8Result, NULL); break; } return (Unicode)utf8Result; }
void * UnicodeGetAllocBytesInternal(ConstUnicode ustr, // IN StringEncoding encoding, // IN ssize_t lengthInBytes, // IN size_t *retLength) // OUT: optional { const char *utf8Str = ustr; char *result = NULL; ASSERT(ustr != NULL); encoding = Unicode_ResolveEncoding(encoding); if (lengthInBytes == -1) { lengthInBytes = Unicode_LengthInBytes(ustr, STRING_ENCODING_UTF8); } switch (encoding) { case STRING_ENCODING_US_ASCII: if (!UnicodeSanityCheck(utf8Str, lengthInBytes, encoding)) { break; } // fall through case STRING_ENCODING_UTF8: result = Util_SafeMalloc(lengthInBytes + 1); memcpy(result, utf8Str, lengthInBytes + 1); if (retLength != NULL) { *retLength = lengthInBytes; } break; case STRING_ENCODING_UTF16_LE: if (!CodeSet_Utf8ToUtf16le(utf8Str, lengthInBytes, &result, retLength)) { // input should be valid UTF-8, no conversion error possible ASSERT_MEM_ALLOC(FALSE); } break; default: if (!CodeSet_GenericToGeneric("UTF-8", utf8Str, lengthInBytes, Unicode_EncodingEnumToName(encoding), CSGTG_NORMAL, &result, retLength)) { // XXX can't distinguish error cause ASSERT(result == NULL); } } return result; }
Unicode Unicode_AllocWithLength(const void *buffer, // IN: ssize_t lengthInBytes, // IN: StringEncoding encoding) // IN: { Unicode result; ASSERT(lengthInBytes >= 0 || lengthInBytes == -1); if (buffer == NULL) { ASSERT(lengthInBytes <= 0); return NULL; } encoding = Unicode_ResolveEncoding(encoding); if (lengthInBytes == -1) { lengthInBytes = Unicode_LengthInBytes(buffer, encoding); } result = UnicodeAllocInternal(buffer, lengthInBytes, encoding, FALSE); if (result == NULL) { char *escapedBuffer = Unicode_EscapeBuffer(buffer, lengthInBytes, encoding); /* * Log and panic on failure. */ Log("%s: Couldn't convert invalid buffer [%s] from %s to Unicode.\n", __FUNCTION__, escapedBuffer ? escapedBuffer : "(couldn't escape bytes)", Unicode_EncodingEnumToName(encoding)); free(escapedBuffer); PANIC(); } return result; }
Bool Unicode_IsBufferValid(const void *buffer, // IN ssize_t lengthInBytes, // IN StringEncoding encoding) // IN { if (buffer == NULL) { ASSERT(lengthInBytes <= 0); return TRUE; } encoding = Unicode_ResolveEncoding(encoding); if (encoding == STRING_ENCODING_US_ASCII) { return UnicodeSanityCheck(buffer, lengthInBytes, encoding); } if (lengthInBytes == -1) { lengthInBytes = Unicode_LengthInBytes(buffer, encoding); } return CodeSet_Validate(buffer, lengthInBytes, Unicode_EncodingEnumToName(encoding)); }
Bool Unicode_CopyBytes(void *destBuffer, // OUT ConstUnicode srcBuffer, // IN size_t maxLengthInBytes, // IN size_t *retLength, // OUT StringEncoding encoding) // IN { const char *utf8Str = (const char *)srcBuffer; Bool success = FALSE; size_t copyBytes = 0; encoding = Unicode_ResolveEncoding(encoding); switch (encoding) { case STRING_ENCODING_US_ASCII: if (!UnicodeSanityCheck(utf8Str, -1, encoding)) { break; } // fall through case STRING_ENCODING_UTF8: { size_t len = strlen(utf8Str); copyBytes = MIN(len, maxLengthInBytes - 1); memcpy(destBuffer, utf8Str, copyBytes); /* * If we truncated, force a null termination in a UTF-8 safe * manner. */ if (copyBytes >= len) { success = TRUE; } else { if (encoding == STRING_ENCODING_UTF8) { copyBytes = CodeSet_Utf8FindCodePointBoundary(destBuffer, copyBytes); } } ((char*)destBuffer)[copyBytes] = '\0'; } break; case STRING_ENCODING_UTF16_LE: { char *utf16Buf; size_t utf16BufLen; if (!CodeSet_Utf8ToUtf16le(utf8Str, strlen(utf8Str), &utf16Buf, &utf16BufLen)) { // input should be valid UTF-8, no conversion error possible ASSERT_MEM_ALLOC(FALSE); break; } copyBytes = MIN(utf16BufLen, maxLengthInBytes - 2); memcpy(destBuffer, utf16Buf, copyBytes); copyBytes = CodeSet_Utf16FindCodePointBoundary(destBuffer, copyBytes); ((utf16_t*)destBuffer)[copyBytes / 2] = 0; free(utf16Buf); if (copyBytes >= utf16BufLen) { success = TRUE; } break; } default: { char *currentBuf; size_t currentBufSize; if (!CodeSet_GenericToGeneric("UTF-8", utf8Str, strlen(utf8Str), Unicode_EncodingEnumToName(encoding), CSGTG_NORMAL, ¤tBuf, ¤tBufSize)) { // XXX can't distinguish error cause break; } copyBytes = MIN(currentBufSize, maxLengthInBytes - 1); memcpy(destBuffer, currentBuf, copyBytes); free(currentBuf); /* * XXX this isn't quite correct, we still need to truncate on * a code point boundary, based on the current encoding type, * rather than just null terminate blindly. */ ((char*)destBuffer)[copyBytes] = 0; if (copyBytes >= currentBufSize) { success = TRUE; } } break; } if (retLength) { *retLength = copyBytes; } return success; }