idn_result_t idn_delimitermap_add(idn_delimitermap_t ctx, unsigned long delimiter) { idn_result_t r; assert(ctx != NULL && ctx->ndelimiters <= ctx->delimiter_size); TRACE(("idn_delimitermap_add(delimiter=\\x%04lx)\n", delimiter)); if (delimiter == 0 || delimiter > UNICODE_MAX || IS_SURROGATE_HIGH(delimiter) || IS_SURROGATE_LOW(delimiter)) { r = idn_invalid_codepoint; goto ret; } if (ctx->ndelimiters == ctx->delimiter_size) { unsigned long *new_delimiters; new_delimiters = (unsigned long *) realloc(ctx->delimiters, sizeof(unsigned long) * ctx->delimiter_size * 2); if (new_delimiters == NULL) { r = idn_nomemory; goto ret; } ctx->delimiters = new_delimiters; ctx->delimiter_size *= 2; } ctx->delimiters[ctx->ndelimiters] = delimiter; ctx->ndelimiters++; r = idn_success; ret: TRACE(("idn_delimitermap_add(): %s\n", idn_result_tostring(r))); return (r); }
idn_result_t idn_ucs4_ucs4toutf16(const unsigned long *ucs4, unsigned short *utf16, size_t tolen) { unsigned short *utf16p = utf16; unsigned long v; idn_result_t r; TRACE(("idn_ucs4_ucs4toutf16(ucs4=\"%s\", tolen=%d)\n", idn__debug_ucs4xstring(ucs4, 50), (int)tolen)); while (*ucs4 != '\0') { v = *ucs4++; if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) { WARNING(("idn_ucs4_ucs4toutf16: UCS4 string contains " "surrogate pair\n")); r = idn_invalid_encoding; goto ret; } else if (v > 0xffff) { /* Convert to surrogate pair */ if (v >= 0x110000) { r = idn_invalid_encoding; goto ret; } if (tolen < 2) { r = idn_buffer_overflow; goto ret; } *utf16p++ = SURROGATE_HIGH(v); *utf16p++ = SURROGATE_LOW(v); tolen -= 2; } else { if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *utf16p++ = v; tolen--; } } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *utf16p = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn_ucs4_ucs4toutf16(): success (utf16=\"%s\")\n", idn__debug_utf16xstring(utf16, 50))); } else { TRACE(("idn_ucs4_ucs4toutf16(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn_ucs4_utf16toucs4(const unsigned short *utf16, unsigned long *ucs4, size_t tolen) { unsigned long *ucs4p = ucs4; unsigned short v0, v1; idn_result_t r; TRACE(("idn_ucs4_utf16toucs4(utf16=\"%s\", tolen=%d)\n", idn__debug_utf16xstring(utf16, 50), (int)tolen)); while (*utf16 != '\0') { v0 = *utf16; if (tolen < 1) { r = idn_buffer_overflow; goto ret; } if (IS_SURROGATE_HIGH(v0)) { v1 = *(utf16 + 1); if (!IS_SURROGATE_LOW(v1)) { WARNING(("idn_ucs4_utf16toucs4: " "corrupted surrogate pair\n")); r = idn_invalid_encoding; goto ret; } *ucs4p++ = COMBINE_SURROGATE(v0, v1); tolen--; utf16 += 2; } else { *ucs4p++ = v0; tolen--; utf16++; } } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *ucs4p = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn_ucs4_utf16toucs4(): success (ucs4=\"%s\")\n", idn__debug_ucs4xstring(ucs4, 50))); } else { TRACE(("idn_ucs4_utf16toucs4(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn_ucs4_ucs4toutf8(const unsigned long *ucs4, char *utf8, size_t tolen) { unsigned char *utf8p = (unsigned char *)utf8; unsigned long v; int width; int mask; int offset; idn_result_t r; TRACE(("idn_ucs4_ucs4toutf8(ucs4=\"%s\", tolen=%d)\n", idn__debug_ucs4xstring(ucs4, 50), (int)tolen)); while (*ucs4 != '\0') { v = *ucs4++; if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) { WARNING(("idn_ucs4_ucs4toutf8: UCS4 string contains " "surrogate pair\n")); r = idn_invalid_encoding; goto ret; } if (v < 0x80) { mask = 0; width = 1; } else if (v < 0x800) { mask = 0xc0; width = 2; } else if (v < 0x10000) { mask = 0xe0; width = 3; } else if (v < 0x200000) { mask = 0xf0; width = 4; } else if (v < 0x4000000) { mask = 0xf8; width = 5; } else if (v < 0x80000000) { mask = 0xfc; width = 6; } else { WARNING(("idn_ucs4_ucs4toutf8: invalid character\n")); r = idn_invalid_encoding; goto ret; } if (tolen < width) { r = idn_buffer_overflow; goto ret; } offset = 6 * (width - 1); *utf8p++ = (v >> offset) | mask; mask = 0x80; while (offset > 0) { offset -= 6; *utf8p++ = ((v >> offset) & 0x3f) | mask; } tolen -= width; } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *utf8p = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn_ucs4_ucs4toutf8(): success (utf8=\"%s\")\n", idn__debug_xstring(utf8, 50))); } else { TRACE(("idn_ucs4_ucs4toutf8(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn_ucs4_utf8toucs4(const char *utf8, unsigned long *ucs4, size_t tolen) { const unsigned char *utf8p = (const unsigned char *)utf8; unsigned long *ucs4p = ucs4; unsigned long v, min; unsigned char c; int width; int i; idn_result_t r; TRACE(("idn_ucs4_utf8toucs4(utf8=\"%s\", tolen=%d)\n", idn__debug_xstring(utf8, 50), (int)tolen)); while(*utf8p != '\0') { c = *utf8p++; if (c < 0x80) { v = c; min = 0; width = 1; } else if (c < 0xc0) { WARNING(("idn_ucs4_utf8toucs4: invalid character\n")); r = idn_invalid_encoding; goto ret; } else if (c < 0xe0) { v = c & 0x1f; min = 0x80; width = 2; } else if (c < 0xf0) { v = c & 0x0f; min = 0x800; width = 3; } else if (c < 0xf8) { v = c & 0x07; min = 0x10000; width = 4; } else if (c < 0xfc) { v = c & 0x03; min = 0x200000; width = 5; } else if (c < 0xfe) { v = c & 0x01; min = 0x4000000; width = 6; } else { WARNING(("idn_ucs4_utf8toucs4: invalid character\n")); r = idn_invalid_encoding; goto ret; } for (i = width - 1; i > 0; i--) { c = *utf8p++; if (c < 0x80 || 0xc0 <= c) { WARNING(("idn_ucs4_utf8toucs4: " "invalid character\n")); r = idn_invalid_encoding; goto ret; } v = (v << 6) | (c & 0x3f); } if (v < min) { WARNING(("idn_ucs4_utf8toucs4: invalid character\n")); r = idn_invalid_encoding; goto ret; } if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) { WARNING(("idn_ucs4_utf8toucs4: UTF-8 string contains " "surrogate pair\n")); r = idn_invalid_encoding; goto ret; } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } tolen--; *ucs4p++ = v; } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *ucs4p = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn_ucs4_utf8toucs4(): success (ucs4=\"%s\")\n", idn__debug_ucs4xstring(ucs4, 50))); } else { TRACE(("idn_ucs4_utf8toucs4(): %s\n", idn_result_tostring(r))); } return (r); }