static idn_result_t label_localmap(idn_resconf_t ctx, labellist_t label) { const unsigned long *from; const unsigned long *tld; unsigned long *to = NULL; size_t to_length; idn_mapselector_t local_mapper; idn_result_t r; from = labellist_getname(label); tld = labellist_gettldname(label); TRACE(("res localmap(label=\"%s\", tld=\"%s\")\n", idn__debug_ucs4xstring(from, 50), idn__debug_ucs4xstring(tld, 50))); local_mapper = idn_resconf_getlocalmapselector(ctx); if (local_mapper == NULL) { r = idn_success; goto ret; } if (tld == from) tld = idn_mapselector_getdefaulttld(); to_length = idn_ucs4_strlen(from) + 1 + 15; /* 15 for margin */ for (;;) { unsigned long *new_buffer; new_buffer = (unsigned long *) realloc(to, sizeof(long) * to_length); if (new_buffer == NULL) { r = idn_nomemory; goto ret; } to = new_buffer; r = idn_mapselector_map2(local_mapper, from, tld, to, to_length); if (r == idn_success) break; else if (r != idn_buffer_overflow) goto ret; to_length *= 2; } r = labellist_setname(label, to); ret: if (r == idn_success) { TRACE(("res localmap(): success (label=\"%s\")\n", idn__debug_ucs4xstring(labellist_getname(label), 50))); } else { TRACE(("res localmap(): %s\n", idn_result_tostring(r))); } if (local_mapper != NULL) idn_mapselector_destroy(local_mapper); free(to); return (r); }
static idn_result_t label_normalize(idn_resconf_t ctx, labellist_t label) { const unsigned long *from; unsigned long *to = NULL; size_t to_length; idn_normalizer_t normalizer; idn_result_t r; from = labellist_getname(label); TRACE(("res normalzie(label=\"%s\")\n", idn__debug_ucs4xstring(from, 50))); normalizer = idn_resconf_getnormalizer(ctx); if (normalizer == NULL) { r = idn_success; goto ret; } to_length = idn_ucs4_strlen(from) + 1 + 15; /* 15 for margin */ for (;;) { unsigned long *new_buffer; new_buffer = (unsigned long *) realloc(to, sizeof(long) * to_length); if (new_buffer == NULL) { r = idn_nomemory; goto ret; } to = new_buffer; r = idn_normalizer_normalize(normalizer, from, to, to_length); if (r == idn_success) break; else if (r != idn_buffer_overflow) goto ret; to_length *= 2; } r = labellist_setname(label, to); ret: if (r == idn_success) { TRACE(("res normalize(): success (label=\"%s\")\n", idn__debug_ucs4xstring(labellist_getname(label), 50))); } else { TRACE(("res normalize(): %s\n", idn_result_tostring(r))); } if (normalizer != NULL) idn_normalizer_destroy(normalizer); free(to); return (r); }
idn_result_t idn_converter_convtoucs4(idn_converter_t ctx, const char *from, unsigned long *to, size_t tolen) { idn_result_t r; assert(ctx != NULL && from != NULL && to != NULL); TRACE(("idn_converter_convtoucs4(ctx=%s, from=\"%s\", tolen=%d)\n", ctx->local_encoding_name, idn__debug_xstring(from, 50), (int)tolen)); if (!ctx->opened_convtoucs4) { r = (*ctx->ops->opentoucs4)(ctx, &(ctx->private_data)); if (r != idn_success) goto ret; ctx->opened_convtoucs4 = 1; } r = (*ctx->ops->convtoucs4)(ctx, ctx->private_data, from, to, tolen); ret: if (r == idn_success) { TRACE(("idn_converter_convtoucs4(): success (to=\"%s\")\n", idn__debug_ucs4xstring(to, 50))); } else { TRACE(("idn_converter_convtoucs4(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn__filechecker_lookup(idn__filechecker_t ctx, const unsigned long *str, const unsigned long **found) { idn_result_t r = idn_success; assert(ctx != NULL && str != NULL); TRACE(("idn__filechecker_lookup(str=\"%s\")\n", idn__debug_ucs4xstring(str, 50))); while (*str != '\0') { int exists; r = idn_ucsset_lookup(ctx->set, *str, &exists); if (r != idn_success) { return (r); } else if (exists) { /* Found. */ *found = str; return (idn_success); } str++; } *found = NULL; return (idn_success); }
static idn_result_t label_bidicheck(idn_resconf_t ctx, labellist_t label) { const unsigned long *name, *found; idn_checker_t bidi_checker; idn_result_t r; name = labellist_getname(label); TRACE(("res bidicheck(label=\"%s\")\n", idn__debug_ucs4xstring(name, 50))); bidi_checker = idn_resconf_getbidichecker(ctx); if (bidi_checker == NULL) { r = idn_success; goto ret; } r = idn_checker_lookup(bidi_checker, name, &found); idn_checker_destroy(bidi_checker); if (r == idn_success && found != NULL) r = idn_prohibited; ret: TRACE(("res bidicheck(): %s\n", idn_result_tostring(r))); return (r); }
idn_result_t idn_ucs4_ucs4toutf16(const unsigned long *ucs4, unsigned short *utf16, size_t tolen) { unsigned short *utf16p = utf16; unsigned long v; idn_result_t r; TRACE(("idn_ucs4_ucs4toutf16(ucs4=\"%s\", tolen=%d)\n", idn__debug_ucs4xstring(ucs4, 50), (int)tolen)); while (*ucs4 != '\0') { v = *ucs4++; if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) { WARNING(("idn_ucs4_ucs4toutf16: UCS4 string contains " "surrogate pair\n")); r = idn_invalid_encoding; goto ret; } else if (v > 0xffff) { /* Convert to surrogate pair */ if (v >= 0x110000) { r = idn_invalid_encoding; goto ret; } if (tolen < 2) { r = idn_buffer_overflow; goto ret; } *utf16p++ = SURROGATE_HIGH(v); *utf16p++ = SURROGATE_LOW(v); tolen -= 2; } else { if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *utf16p++ = v; tolen--; } } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *utf16p = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn_ucs4_ucs4toutf16(): success (utf16=\"%s\")\n", idn__debug_utf16xstring(utf16, 50))); } else { TRACE(("idn_ucs4_ucs4toutf16(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn_nameprep_isunassigned(idn_nameprep_t handle, const PRUint32 *str, const PRUint32 **found) { assert(handle != NULL && str != NULL && found != NULL); TRACE(("idn_nameprep_isunassigned(handle->version, str=\"%s\")\n", handle->version, idn__debug_ucs4xstring(str, 50))); return (idn_nameprep_check(handle->unassigned_proc, str, found)); }
idn_result_t idn_nameprep_isprohibited(idn_nameprep_t handle, const unsigned long *str, const unsigned long **found) { assert(handle != NULL && str != NULL && found != NULL); TRACE(("idn_nameprep_isprohibited(ctx=%s, str=\"%s\")\n", handle->version, idn__debug_ucs4xstring(str, 50))); return (idn_nameprep_check(handle->prohibited_proc, str, found)); }
idn_result_t idn__filemapper_map(idn__filemapper_t ctx, const unsigned long *from, unsigned long *to, size_t tolen) { idn_result_t r = idn_success; ucsbuf_t ub; assert(ctx != NULL && from != NULL && to != NULL); TRACE(("idn__filemapper_map(from=\"%s\")\n", idn__debug_ucs4xstring(from, 50))); /* Initialize temporary buffer. */ ucsbuf_init(&ub); while (*from != '\0') { /* Try mapping. */ r = idn_ucsmap_map(ctx->map, *from, ub.ucs, ub.size, &ub.len); switch (r) { case idn_buffer_overflow: /* Temporary buffer too small. Enlarge and retry. */ if ((r = ucsbuf_grow(&ub)) != idn_success) break; continue; case idn_nomapping: /* There is no mapping. */ r = idn_success; /* fallthrough */ case idn_success: if (tolen < ub.len) { r = idn_buffer_overflow; goto ret; } memcpy(to, ub.ucs, sizeof(*to) * ub.len); to += ub.len; tolen -= ub.len; break; default: goto ret; } from++; } ret: ucsbuf_free(&ub); if (r == idn_success) { /* Terminate with NUL. */ if (tolen == 0) return (idn_buffer_overflow); *to = '\0'; } return (r); }
idn_result_t idn_ucs4_utf16toucs4(const unsigned short *utf16, unsigned long *ucs4, size_t tolen) { unsigned long *ucs4p = ucs4; unsigned short v0, v1; idn_result_t r; TRACE(("idn_ucs4_utf16toucs4(utf16=\"%s\", tolen=%d)\n", idn__debug_utf16xstring(utf16, 50), (int)tolen)); while (*utf16 != '\0') { v0 = *utf16; if (tolen < 1) { r = idn_buffer_overflow; goto ret; } if (IS_SURROGATE_HIGH(v0)) { v1 = *(utf16 + 1); if (!IS_SURROGATE_LOW(v1)) { WARNING(("idn_ucs4_utf16toucs4: " "corrupted surrogate pair\n")); r = idn_invalid_encoding; goto ret; } *ucs4p++ = COMBINE_SURROGATE(v0, v1); tolen--; utf16 += 2; } else { *ucs4p++ = v0; tolen--; utf16++; } } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *ucs4p = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn_ucs4_utf16toucs4(): success (ucs4=\"%s\")\n", idn__debug_ucs4xstring(ucs4, 50))); } else { TRACE(("idn_ucs4_utf16toucs4(): %s\n", idn_result_tostring(r))); } return (r); }
static idn_result_t label_lencheck_nonace(idn_resconf_t ctx, labellist_t label) { idn_converter_t idn_converter; const unsigned long *from; size_t to_length; idn_result_t r; char *buffer = NULL; size_t buffer_length; from = labellist_getname(label); TRACE(("res lencheck(label=\"%s\")\n", idn__debug_ucs4xstring(from, 50))); buffer_length = idn_ucs4_strlen(from) * 4 + 16; /* 16 for margin */ idn_converter = idn_resconf_getidnconverter(ctx); for (;;) { void *new_buffer; new_buffer = realloc(buffer, sizeof(*buffer) * buffer_length); if (new_buffer == NULL) { r = idn_nomemory; goto ret; } buffer = (char *)new_buffer; if (idn_converter != NULL) { r = idn_converter_convfromucs4(idn_converter, from, buffer, buffer_length); } else { r = idn_ucs4_ucs4toutf8(from, buffer, buffer_length); } if (r == idn_success) break; else if (r != idn_buffer_overflow) goto ret; buffer_length *= 2; } to_length = strlen(buffer); if (to_length == 0 || to_length > MAX_LABEL_LENGTH) { r = idn_invalid_length; goto ret; } r = idn_success; ret: TRACE(("res lencheck(): %s\n", idn_result_tostring(r))); if (idn_converter != NULL) idn_converter_destroy(idn_converter); free(buffer); return (r); }
static idn_result_t label_localdecodecheck(idn_resconf_t ctx, labellist_t label) { idn_converter_t local_converter = NULL; const unsigned long *from; char *to = NULL; size_t to_length; idn_result_t r; from = labellist_getname(label); to_length = idn_ucs4_strlen(from) + 1 + 15; /* 15 for margin */ TRACE(("res ucs4tolocal_check(label=\"%s\")\n", idn__debug_ucs4xstring(from, 50))); local_converter = idn_resconf_getlocalconverter(ctx); if (local_converter == NULL) { r = idn_success; goto ret; } for (;;) { char *new_buffer; new_buffer = (char *)realloc(to, to_length); if (new_buffer == NULL) { r = idn_nomemory; goto ret; } to = new_buffer; r = idn_converter_convfromucs4(local_converter, from, to, to_length); if (r == idn_success) break; else if (r == idn_nomapping) { r = label_idnencode_ace(ctx, label); if (r != idn_success) goto ret; break; } else if (r != idn_buffer_overflow) { goto ret; } to_length *= 2; } r = idn_success; ret: TRACE(("res ucs4tolocal_check(): %s\n", idn_result_tostring(r))); if (local_converter != NULL) idn_converter_destroy(local_converter); free(to); return (r); }
static idn_result_t label_lencheck_ace(idn_resconf_t ctx, labellist_t label) { const unsigned long *name; size_t name_length; idn_result_t r; name = labellist_getname(label); name_length = idn_ucs4_strlen(name); TRACE(("res lencheck(label=\"%s\")\n", idn__debug_ucs4xstring(name, 50))); if (name_length == 0 || name_length > MAX_LABEL_LENGTH) { r = idn_invalid_length; goto ret; } r = idn_success; ret: TRACE(("res lencheck(): %s\n", idn_result_tostring(r))); return (r); }
idn_result_t idn_checker_lookup(idn_checker_t ctx, const unsigned long *ucs4, const unsigned long **found) { idn_result_t r; int i; assert(scheme_hash != NULL); assert(ctx != NULL && ucs4 != NULL && found != NULL); TRACE(("idn_checker_lookup(ucs4=\"%s\")\n", idn__debug_ucs4xstring(ucs4, 50))); /* * Lookup. */ *found = NULL; for (i = 0; i < ctx->nschemes; i++) { TRACE(("idn_checker_lookup(): lookup %s\n", ctx->schemes[i].prefix)); r = (ctx->schemes[i].lookup)(ctx->schemes[i].context, ucs4, found); if (r != idn_success) goto ret; if (*found != NULL) break; } r = idn_success; ret: if (*found == NULL) { TRACE(("idn_checker_lookup(): %s (not found)\n", idn_result_tostring(r))); } else { TRACE(("idn_checker_lookup(): %s (found \\x%04lx)\n", idn_result_tostring(r), **found)); } return (r); }
idn_result_t idn_converter_convfromucs4(idn_converter_t ctx, const unsigned long *from, char *to, size_t tolen) { idn_result_t r; assert(ctx != NULL && from != NULL && to != NULL); TRACE(("idn_converter_convfromucs4(ctx=%s, from=\"%s\", tolen=%d)\n", ctx->local_encoding_name, idn__debug_ucs4xstring(from, 50), (int)tolen)); if (!ctx->opened_convfromucs4) { r = (*ctx->ops->openfromucs4)(ctx, &(ctx->private_data)); if (r != idn_success) goto ret; ctx->opened_convfromucs4 = 1; } r = (*ctx->ops->convfromucs4)(ctx, ctx->private_data, from, to, tolen); if (r != idn_success) goto ret; if ((ctx->flags & IDN_CONVERTER_RTCHECK) != 0) { r = roundtrip_check(ctx, from, to); if (r != idn_success) goto ret; } r = idn_success; ret: if (r == idn_success) { TRACE(("idn_converter_convfromucs4(): success (to=\"%s\")\n", idn__debug_xstring(to, 50))); } else { TRACE(("idn_converter_convfromucs4(): %s\n", idn_result_tostring(r))); } return (r); }
static idn_result_t label_asccheck(idn_resconf_t ctx, labellist_t label) { const unsigned long *name, *n; idn_result_t r; name = labellist_getname(label); TRACE(("res asccheck(label=\"%s\")\n", idn__debug_ucs4xstring(name, 50))); if (*name == '-') { r = idn_prohibited; goto ret; } for (n = name; *n != '\0'; n++) { if (*n <= '\177') { if ((*n < '0' || *n > '9') && (*n < 'A' || *n > 'Z') && (*n < 'a' || *n > 'z') && *n != '-') { r = idn_prohibited; goto ret; } } } if (n > name && *(n - 1) == '-') { r = idn_prohibited; goto ret; } r = idn_success; ret: TRACE(("res asccheck(): %s\n", idn_result_tostring(r))); return (r); }
idn_result_t idn_delimitermap_map(idn_delimitermap_t ctx, const unsigned long *from, unsigned long *to, size_t tolen) { /* default delimiters (label separators) from IDNA specification */ static const unsigned long default_delimiters[] = { 0x002e, /* full stop */ 0x3002, /* ideographic full stop */ 0xff0e, /* fullwidth full stop */ 0xff61, /* halfwidth ideographic full stop */ 0x0000 }; unsigned long *to_org = to; idn_result_t r; int i, j; int found; assert(ctx != NULL && from != NULL && to != NULL); TRACE(("idn_delimitermap_map(from=\"%s\", tolen=%d)\n", idn__debug_ucs4xstring(from, 50), (int)tolen)); /* * Map. */ while (*from != '\0') { found = 0; if (tolen < 1) { r = idn_buffer_overflow; goto ret; } for (j = 0; default_delimiters[j] != 0x0000; j++) { if (default_delimiters[j] == *from) { found = 1; break; } } if (!found) { for (i = 0; i < ctx->ndelimiters; i++) { if (ctx->delimiters[i] == *from) { found = 1; break; } } } if (found) *to = '.'; else *to = *from; from++; to++; tolen--; } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *to = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn_delimitermap_map(): success (to=\"%s\")\n", idn__debug_ucs4xstring(to_org, 50))); } else { TRACE(("idn_delimitermap_map(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn_ucs4_ucs4toutf8(const unsigned long *ucs4, char *utf8, size_t tolen) { unsigned char *utf8p = (unsigned char *)utf8; unsigned long v; int width; int mask; int offset; idn_result_t r; TRACE(("idn_ucs4_ucs4toutf8(ucs4=\"%s\", tolen=%d)\n", idn__debug_ucs4xstring(ucs4, 50), (int)tolen)); while (*ucs4 != '\0') { v = *ucs4++; if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) { WARNING(("idn_ucs4_ucs4toutf8: UCS4 string contains " "surrogate pair\n")); r = idn_invalid_encoding; goto ret; } if (v < 0x80) { mask = 0; width = 1; } else if (v < 0x800) { mask = 0xc0; width = 2; } else if (v < 0x10000) { mask = 0xe0; width = 3; } else if (v < 0x200000) { mask = 0xf0; width = 4; } else if (v < 0x4000000) { mask = 0xf8; width = 5; } else if (v < 0x80000000) { mask = 0xfc; width = 6; } else { WARNING(("idn_ucs4_ucs4toutf8: invalid character\n")); r = idn_invalid_encoding; goto ret; } if (tolen < width) { r = idn_buffer_overflow; goto ret; } offset = 6 * (width - 1); *utf8p++ = (v >> offset) | mask; mask = 0x80; while (offset > 0) { offset -= 6; *utf8p++ = ((v >> offset) & 0x3f) | mask; } tolen -= width; } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *utf8p = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn_ucs4_ucs4toutf8(): success (utf8=\"%s\")\n", idn__debug_xstring(utf8, 50))); } else { TRACE(("idn_ucs4_ucs4toutf8(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn_ucs4_utf8toucs4(const char *utf8, unsigned long *ucs4, size_t tolen) { const unsigned char *utf8p = (const unsigned char *)utf8; unsigned long *ucs4p = ucs4; unsigned long v, min; unsigned char c; int width; int i; idn_result_t r; TRACE(("idn_ucs4_utf8toucs4(utf8=\"%s\", tolen=%d)\n", idn__debug_xstring(utf8, 50), (int)tolen)); while(*utf8p != '\0') { c = *utf8p++; if (c < 0x80) { v = c; min = 0; width = 1; } else if (c < 0xc0) { WARNING(("idn_ucs4_utf8toucs4: invalid character\n")); r = idn_invalid_encoding; goto ret; } else if (c < 0xe0) { v = c & 0x1f; min = 0x80; width = 2; } else if (c < 0xf0) { v = c & 0x0f; min = 0x800; width = 3; } else if (c < 0xf8) { v = c & 0x07; min = 0x10000; width = 4; } else if (c < 0xfc) { v = c & 0x03; min = 0x200000; width = 5; } else if (c < 0xfe) { v = c & 0x01; min = 0x4000000; width = 6; } else { WARNING(("idn_ucs4_utf8toucs4: invalid character\n")); r = idn_invalid_encoding; goto ret; } for (i = width - 1; i > 0; i--) { c = *utf8p++; if (c < 0x80 || 0xc0 <= c) { WARNING(("idn_ucs4_utf8toucs4: " "invalid character\n")); r = idn_invalid_encoding; goto ret; } v = (v << 6) | (c & 0x3f); } if (v < min) { WARNING(("idn_ucs4_utf8toucs4: invalid character\n")); r = idn_invalid_encoding; goto ret; } if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) { WARNING(("idn_ucs4_utf8toucs4: UTF-8 string contains " "surrogate pair\n")); r = idn_invalid_encoding; goto ret; } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } tolen--; *ucs4p++ = v; } if (tolen < 1) { r = idn_buffer_overflow; goto ret; } *ucs4p = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn_ucs4_utf8toucs4(): success (ucs4=\"%s\")\n", idn__debug_ucs4xstring(ucs4, 50))); } else { TRACE(("idn_ucs4_utf8toucs4(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn__punycode_decode(idn_converter_t ctx, void *privdata, const char *from, unsigned long *to, size_t tolen) { unsigned long *to_org = to; unsigned long c, idx; size_t prefixlen = strlen(IDN_PUNYCODE_PREFIX); size_t fromlen; size_t uidx, fidx, ucslen; int first, bias; idn_result_t r; assert(ctx != NULL); TRACE(("idn__punycode_decode(from=\"%s\", tolen=%d)\n", idn__debug_xstring(from, 50), (int)tolen)); if (!idn__util_asciihaveaceprefix(from, IDN_PUNYCODE_PREFIX)) { if (*from == '\0') { r = idn_ucs4_utf8toucs4(from, to, tolen); goto ret; } r = idn_invalid_encoding; goto ret; } from += prefixlen; fromlen = strlen(from); /* * Find the last delimiter, and copy the characters * before it verbatim. */ ucslen = 0; for (fidx = fromlen; fidx > 0; fidx--) { if (from[fidx - 1] == '-') { if (tolen < fidx) { r = idn_buffer_overflow; goto ret; } for (uidx = 0; uidx < fidx - 1; uidx++) { to[uidx] = from[uidx]; } ucslen = uidx; break; } } first = 1; bias = PUNYCODE_INITIAL_BIAS; c = PUNYCODE_INITIAL_N; idx = 0; while (fidx < fromlen) { int len; unsigned long delta; int i; len = punycode_getwc(from + fidx, fromlen - fidx, bias, &delta); if (len == 0) { r = idn_invalid_encoding; goto ret; } fidx += len; bias = punycode_update_bias(delta, ucslen + 1, first); first = 0; idx += delta; c += idx / (ucslen + 1); uidx = idx % (ucslen + 1); /* Insert 'c' at uidx. */ if (tolen-- <= 0) { r = idn_buffer_overflow; goto ret; } for (i = ucslen; i > uidx; i--) to[i] = to[i - 1]; to[uidx] = c; ucslen++; idx = uidx + 1; } /* Terminate with NUL. */ if (tolen <= 0) { r = idn_buffer_overflow; goto ret; } to[ucslen] = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn__punycode_decode(): succcess (to=\"%s\")\n", idn__debug_ucs4xstring(to_org, 50))); } else { TRACE(("idn__punycode_decode(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn__punycode_encode(idn_converter_t ctx, void *privdata, const unsigned long *from, char *to, size_t tolen) { char *to_org = to; unsigned long cur_code, next_code, delta; size_t prefixlen = strlen(IDN_PUNYCODE_PREFIX); size_t fromlen; size_t ucsdone; size_t toidx; int uidx, bias, first; idn_result_t r; assert(ctx != NULL); TRACE(("idn__punycode_encode(from=\"%s\", tolen=%d)\n", idn__debug_ucs4xstring(from, 50), (int)tolen)); if (*from == '\0') { r = idn_ucs4_ucs4toutf8(from, to, tolen); goto ret; } else if (idn__util_ucs4haveaceprefix(from, IDN_PUNYCODE_PREFIX)) { r = idn_prohibited; goto ret; } if (tolen < prefixlen) { r = idn_buffer_overflow; goto ret; } memcpy(to, IDN_PUNYCODE_PREFIX, prefixlen); to += prefixlen; tolen -= prefixlen; fromlen = idn_ucs4_strlen(from); /* * If the input string is too long (actually too long to be sane), * return failure in order to prevent possible overflow. */ if (fromlen > PUNYCODE_MAXINPUT) { ERROR(("idn__punycode_encode(): " "the input string is too long to convert Punycode\n", idn__debug_ucs4xstring(from, 50))); r = idn_failure; goto ret; } ucsdone = 0; /* number of characters processed */ toidx = 0; /* * First, pick up basic code points and copy them to 'to'. */ for (uidx = 0; uidx < fromlen; uidx++) { if (from[uidx] < 0x80) { if (toidx >= tolen) { r = idn_buffer_overflow; goto ret; } to[toidx++] = from[uidx]; ucsdone++; } } /* * If there are any basic code points, output a delimiter * (hyphen-minus). */ if (toidx > 0) { if (toidx >= tolen) { r = idn_buffer_overflow; goto ret; } to[toidx++] = '-'; to += toidx; tolen -= toidx; } /* * Then encode non-basic characters. */ first = 1; cur_code = PUNYCODE_INITIAL_N; bias = PUNYCODE_INITIAL_BIAS; delta = 0; while (ucsdone < fromlen) { int limit = -1, rest; /* * Find the smallest code point equal to or greater * than 'cur_code'. Also remember the index of the * last occurence of the code point. */ for (next_code = MAX_UCS, uidx = fromlen - 1; uidx >= 0; uidx--) { if (from[uidx] >= cur_code && from[uidx] < next_code) { next_code = from[uidx]; limit = uidx; } } /* There must be such code point. */ assert(limit >= 0); delta += (next_code - cur_code) * (ucsdone + 1); cur_code = next_code; /* * Scan the input string again, and encode characters * whose code point is 'cur_code'. Use 'limit' to avoid * unnecessary scan. */ for (uidx = 0, rest = ucsdone; uidx <= limit; uidx++) { if (from[uidx] < cur_code) { delta++; rest--; } else if (from[uidx] == cur_code) { int sz = punycode_putwc(to, tolen, delta, bias); if (sz == 0) { r = idn_buffer_overflow; goto ret; } to += sz; tolen -= sz; ucsdone++; bias = punycode_update_bias(delta, ucsdone, first); delta = 0; first = 0; } } delta += rest + 1; cur_code++; } /* * Terminate with NUL. */ if (tolen <= 0) { r = idn_buffer_overflow; goto ret; } *to = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn__punycode_encode(): succcess (to=\"%s\")\n", idn__debug_xstring(to_org, 50))); } else { TRACE(("idn__punycode_encode(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn_normalizer_normalize(idn_normalizer_t ctx, const unsigned long *from, unsigned long *to, size_t tolen) { idn_result_t r; unsigned long *src, *dst; unsigned long *buffers[2] = {NULL, NULL}; size_t buflen[2] = {0, 0}; size_t dstlen; int idx; int i; assert(scheme_hash != NULL); assert(ctx != NULL && from != NULL && to != NULL); TRACE(("idn_normalizer_normalize(from=\"%s\", tolen=%d)\n", idn__debug_ucs4xstring(from, 50), (int)tolen)); if (ctx->nschemes <= 0) { if (tolen < idn_ucs4_strlen(from) + 1) { r = idn_buffer_overflow; goto ret; } idn_ucs4_strcpy(to, from); r = idn_success; goto ret; } /* * Normalize. */ src = (void *)from; dstlen = idn_ucs4_strlen(from) + 1; i = 0; while (i < ctx->nschemes) { TRACE(("idn_normalizer_normalize(): normalize %s\n", ctx->schemes[i]->name)); /* * Choose destination area to restore the result of a mapping. */ if (i + 1 == ctx->nschemes) { dst = to; dstlen = tolen; } else { if (src == buffers[0]) idx = 1; else idx = 0; if (buflen[idx] < dstlen) { void *newbuf; newbuf = realloc(buffers[idx], sizeof(long) * dstlen); if (newbuf == NULL) { r = idn_nomemory; goto ret; } buffers[idx] = (unsigned long *)newbuf; buflen[idx] = dstlen; } dst = buffers[idx]; dstlen = buflen[idx]; } /* * Perform i-th normalization scheme. * If buffer size is not enough, we double it and try again. */ r = (ctx->schemes[i]->proc)(src, dst, dstlen); if (r == idn_buffer_overflow && dst != to) { dstlen *= 2; continue; } if (r != idn_success) goto ret; src = dst; i++; } r = idn_success; ret: free(buffers[0]); free(buffers[1]); if (r == idn_success) { TRACE(("idn_normalizer_normalize(): success (to=\"%s\")\n", idn__debug_ucs4xstring(to, 50))); } else { TRACE(("idn_normalizer_normalize(): %s\n", idn_result_tostring(r))); } return (r); }
static idn_result_t roundtrip_check(idn_converter_t ctx, const unsigned long *from, const char *to) { /* * One problem with iconv() convertion is that * iconv() doesn't signal an error if the input * string contains characters which are valid but * do not have mapping to the output codeset. * (the behavior of iconv() for that case is defined as * `implementation dependent') * One way to check this case is to perform round-trip * conversion and see if it is same as the original string. */ idn_result_t r; unsigned long *back; unsigned long backbuf[256]; size_t fromlen; size_t backlen; TRACE(("idn_converter_convert: round-trip checking (from=\"%s\")\n", idn__debug_ucs4xstring(from, 50))); /* Allocate enough buffer. */ fromlen = idn_ucs4_strlen(from) + 1; if (fromlen * sizeof(*back) <= sizeof(backbuf)) { backlen = sizeof(backbuf); back = backbuf; } else { backlen = fromlen; back = (unsigned long *)malloc(backlen * sizeof(*back)); if (back == NULL) return (idn_nomemory); } /* * Perform backward conversion. */ r = idn_converter_convtoucs4(ctx, to, back, backlen); switch (r) { case idn_success: if (memcmp(back, from, sizeof(*from) * fromlen) != 0) r = idn_nomapping; break; case idn_invalid_encoding: case idn_buffer_overflow: r = idn_nomapping; break; default: break; } if (back != backbuf) free(back); if (r != idn_success) { TRACE(("round-trip check failed: %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn_nameprep_map(idn_nameprep_t handle, const PRUint32 *from, PRUint32 *to, size_t tolen) { assert(handle != NULL && from != NULL && to != NULL); TRACE(("idn_nameprep_map(ctx=%s, from=\"%s\")\n", handle->version, idn__debug_ucs4xstring(from, 50))); while (*from != '\0') { PRUint32 v = *from; const char *mapped; if (v > UCS_MAX) { /* This cannot happen, but just in case.. */ return (idn_invalid_codepoint); } else if (v > UNICODE_MAX) { /* No mapping is possible. */ mapped = NULL; } else { /* Try mapping. */ mapped = (*handle->map_proc)(v); } if (mapped == NULL) { /* No mapping. Just copy verbatim. */ if (tolen < 1) return (idn_buffer_overflow); *to++ = v; tolen--; } else { const unsigned char *mappeddata; size_t mappedlen; mappeddata = (const unsigned char *)mapped + 1; mappedlen = *mapped; if (tolen < (mappedlen + 3) / 4) return (idn_buffer_overflow); tolen -= (mappedlen + 3) / 4; while (mappedlen >= 4) { *to = *mappeddata++; *to |= *mappeddata++ << 8; *to |= *mappeddata++ << 16; *to |= *mappeddata++ << 24; mappedlen -= 4; to++; } if (mappedlen > 0) { *to = *mappeddata++; *to |= (mappedlen >= 2) ? *mappeddata++ << 8: 0; *to |= (mappedlen >= 3) ? *mappeddata++ << 16: 0; to++; } } from++; } if (tolen == 0) return (idn_buffer_overflow); *to = '\0'; return (idn_success); }
idn_result_t idn__race_decode(idn_converter_t ctx, void *privdata, const char *from, unsigned long *to, size_t tolen) { unsigned short *buf = NULL; size_t prefixlen = strlen(IDN_RACE_PREFIX); size_t fromlen; size_t buflen; idn_result_t r; assert(ctx != NULL); TRACE(("idn__race_decode(from=\"%s\", tolen=%d)\n", idn__debug_xstring(from, 50), (int)tolen)); if (!idn__util_asciihaveaceprefix(from, IDN_RACE_PREFIX)) { if (*from == '\0') { r = idn_ucs4_utf8toucs4(from, to, tolen); goto ret; } r = idn_invalid_encoding; goto ret; } from += prefixlen; fromlen = strlen(from); /* * Allocate sufficient buffer. */ buflen = fromlen + 1; buf = malloc(sizeof(*buf) * buflen); if (buf == NULL) { r = idn_nomemory; goto ret; } /* * Decode base32 and decompress. */ r = race_decode_decompress(from, buf, buflen); if (r != idn_success) goto ret; /* * Now 'buf' points the decompressed string, which must contain * UTF-16 characters. */ /* * Convert to UCS4. */ r = idn_ucs4_utf16toucs4(buf, to, tolen); if (r != idn_success) goto ret; ret: free(buf); if (r == idn_success) { TRACE(("idn__race_decode(): succcess (to=\"%s\")\n", idn__debug_ucs4xstring(to, 50))); } else { TRACE(("idn__race_decode(): %s\n", idn_result_tostring(r))); } return (r); }
idn_result_t idn_res_decodename(idn_resconf_t ctx, idn_action_t actions, const char *from, char *to, size_t tolen) { idn_converter_t local_converter = NULL; idn_converter_t idn_converter = NULL; idn_delimitermap_t delimiter_mapper; idn_result_t r; labellist_t labels = NULL, l; unsigned long *buffer = NULL; unsigned long *saved_name = NULL; size_t buffer_length; int idn_is_ace; assert(ctx != NULL && from != NULL && to != NULL); TRACE(("idn_res_decodename(actions=%s, from=\"%s\", tolen=%d)\n", idn__res_actionstostring(actions), idn__debug_xstring(from, 50), (int)tolen)); if (actions & ~DECODE_MASK) { WARNING(("idn_res_decodename: invalid actions 0x%x\n", actions)); r = idn_invalid_action; goto ret; } if (!initialized) idn_res_initialize(); if (!enabled || actions == 0) { r = copy_verbatim(from, to, tolen); goto ret; } else if (tolen <= 0) { r = idn_buffer_overflow; goto ret; } if (actions & IDN_DECODE_QUERY) { #ifndef WITHOUT_ICONV actions |= (IDN_DELIMMAP | IDN_MAP | IDN_NORMALIZE | \ IDN_PROHCHECK | IDN_BIDICHECK | IDN_IDNCONV | \ IDN_RTCHECK | IDN_LOCALCONV); #else actions |= (IDN_DELIMMAP | IDN_MAP | IDN_NORMALIZE | \ IDN_PROHCHECK | IDN_BIDICHECK | IDN_IDNCONV | \ IDN_RTCHECK); #endif } /* * Convert `from' to UCS4. */ local_converter = idn_resconf_getlocalconverter(ctx); #ifndef WITHOUT_ICONV if (local_converter == NULL) { r = idn_invalid_name; goto ret; } #endif idn_converter = idn_resconf_getidnconverter(ctx); if (idn_converter != NULL && idn_converter_isasciicompatible(idn_converter)) idn_is_ace = 1; else idn_is_ace = 0; buffer_length = tolen * 2; TRACE(("res idndecode(name=\"%s\")\n", idn__debug_xstring(from, 50))); for (;;) { void *new_buffer; new_buffer = realloc(buffer, sizeof(*buffer) * buffer_length); if (new_buffer == NULL) { r = idn_nomemory; goto ret; } buffer = (unsigned long *)new_buffer; if ((actions & IDN_IDNCONV) && idn_converter != NULL && !idn_is_ace) { r = idn_converter_convtoucs4(idn_converter, from, buffer, buffer_length); } else { r = idn_ucs4_utf8toucs4(from, buffer, buffer_length); } if (r == idn_success) break; else if (r != idn_buffer_overflow) goto ret; buffer_length *= 2; } if (*buffer == '\0') { if (tolen <= 0) { r = idn_buffer_overflow; goto ret; } *to = '\0'; r = idn_success; goto ret; } /* * Delimiter map. */ if (actions & IDN_DELIMMAP) { TRACE(("res delimitermap(name=\"%s\")\n", idn__debug_ucs4xstring(buffer, 50))); delimiter_mapper = idn_resconf_getdelimitermap(ctx); if (delimiter_mapper != NULL) { r = idn_delimitermap_map(delimiter_mapper, buffer, buffer, buffer_length); idn_delimitermap_destroy(delimiter_mapper); if (r != idn_success) goto ret; } TRACE(("res delimitermap(): success (name=\"%s\")\n", idn__debug_ucs4xstring(buffer, 50))); } /* * Split the name into a list of labels. */ r = labellist_create(buffer, &labels); if (r != idn_success) goto ret; /* * Perform conversions and tests. */ for (l = labellist_tail(labels); l != NULL; l = labellist_previous(l)) { free(saved_name); saved_name = NULL; if (!idn__util_ucs4isasciirange(labellist_getname(l))) { if (actions & IDN_MAP) { r = label_map(ctx, l); if (r != idn_success) goto ret; } if (actions & IDN_NORMALIZE) { r = label_normalize(ctx, l); if (r != idn_success) goto ret; } if (actions & IDN_PROHCHECK) { r = label_prohcheck(ctx, l); if (r == idn_prohibited) { labellist_undo(l); continue; } else if (r != idn_success) { goto ret; } } if (actions & IDN_UNASCHECK) { r = label_unascheck(ctx, l); if (r == idn_prohibited) { labellist_undo(l); continue; } else if (r != idn_success) { goto ret; } } if (actions & IDN_BIDICHECK) { r = label_bidicheck(ctx, l); if (r == idn_prohibited) { labellist_undo(l); continue; } else if (r != idn_success) { goto ret; } } } if ((actions & IDN_IDNCONV) && idn_is_ace) { saved_name = idn_ucs4_strdup(labellist_getname(l)); if (saved_name == NULL) { r = idn_nomemory; goto ret; } r = label_idndecode(ctx, l); if (r == idn_invalid_encoding) { labellist_undo(l); continue; } else if (r != idn_success) { goto ret; } } if ((actions & IDN_RTCHECK) && saved_name != NULL) { r = label_rtcheck(ctx, actions, l, saved_name); if (r == idn_invalid_encoding) { labellist_undo(l); continue; } else if (r != idn_success) { goto ret; } } #ifndef WITHOUT_ICONV if (actions & IDN_LOCALCONV) { r = label_localdecodecheck(ctx, l); if (r != idn_success) goto ret; } #endif } /* * Concat a list of labels to a name. */ for (;;) { void *new_buffer; new_buffer = realloc(buffer, sizeof(*buffer) * buffer_length); if (new_buffer == NULL) { r = idn_nomemory; goto ret; } buffer = (unsigned long *)new_buffer; r = labellist_getnamelist(labels, buffer, buffer_length); if (r == idn_success) break; else if (r != idn_buffer_overflow) goto ret; buffer_length *= 2; } if (actions & IDN_LOCALCONV) { r = idn_converter_convfromucs4(local_converter, buffer, to, tolen); } else { r = idn_ucs4_ucs4toutf8(buffer, to, tolen); } ret: if (r == idn_success) { TRACE(("idn_res_decodename(): success (to=\"%s\")\n", idn__debug_xstring(to, 50))); } else { TRACE(("idn_res_decodename(): %s\n", idn_result_tostring(r))); } free(saved_name); free(buffer); if (local_converter != NULL) idn_converter_destroy(local_converter); if (idn_converter != NULL) idn_converter_destroy(idn_converter); if (labels != NULL) labellist_destroy(labels); return (r); }
static idn_result_t label_rtcheck(idn_resconf_t ctx, idn_action_t actions, labellist_t label, const unsigned long *original_name) { labellist_t rt_label = NULL; const unsigned long *rt_name; const unsigned long *cur_name; idn_result_t r; cur_name = labellist_getname(label); TRACE(("res rtcheck(label=\"%s\", org_label=\"%s\")\n", idn__debug_ucs4xstring(cur_name, 50), idn__debug_ucs4xstring(original_name, 50))); r = labellist_create(cur_name, &rt_label); if (r != idn_success) goto ret; if (rt_label == NULL) { if (*original_name == '\0') r = idn_success; else r = idn_invalid_encoding; goto ret; } if (!idn__util_ucs4isasciirange(labellist_getname(rt_label))) { r = label_map(ctx, rt_label); if (r != idn_success) goto ret; r = label_normalize(ctx, rt_label); if (r != idn_success) goto ret; r = label_prohcheck(ctx, rt_label); if (r != idn_success) goto ret; if (actions & IDN_UNASCHECK) { r = label_unascheck(ctx, rt_label); if (r != idn_success) goto ret; } r = label_bidicheck(ctx, rt_label); if (r != idn_success) goto ret; } if (actions & IDN_ASCCHECK) { r = label_asccheck(ctx, rt_label); if (r != idn_success) goto ret; } if (!idn__util_ucs4isasciirange(labellist_getname(rt_label))) { r = label_idnencode_ace(ctx, rt_label); if (r != idn_success) goto ret; } r = label_lencheck_ace(ctx, rt_label); if (r != idn_success) goto ret; rt_name = labellist_getname(rt_label); if (idn_ucs4_strcasecmp(rt_name, original_name) != 0) { TRACE(("res rtcheck(): round trip failed, org =\"%s\", rt=\"%s\"\n", idn__debug_ucs4xstring(original_name, 50), idn__debug_ucs4xstring(rt_name, 50))); r = idn_invalid_encoding; goto ret; } r = idn_success; ret: if (r != idn_nomemory && r != idn_success) r = idn_invalid_encoding; TRACE(("res rtcheck(): %s\n", idn_result_tostring(r))); if (rt_label != NULL) labellist_destroy(rt_label); return (r); }
idn_result_t idn_nameprep_isvalidbidi(idn_nameprep_t handle, const PRUint32 *str, const PRUint32 **found) { PRUint32 v; idn_biditype_t first_char; idn_biditype_t last_char; int found_r_al; assert(handle != NULL && str != NULL && found != NULL); TRACE(("idn_nameprep_isvalidbidi(ctx=%s, str=\"%s\")\n", handle->version, idn__debug_ucs4xstring(str, 50))); if (*str == '\0') { *found = NULL; return (idn_success); } /* * check first character's type and initialize variables. */ found_r_al = 0; if (*str > UCS_MAX) { /* This cannot happen, but just in case.. */ return (idn_invalid_codepoint); } else if (*str > UNICODE_MAX) { /* It is invalid.. */ *found = str; return (idn_success); } first_char = last_char = (*(handle->biditype_proc))(*str); if (first_char == idn_biditype_r_al) { found_r_al = 1; } str++; /* * see whether string is valid or not. */ while (*str != '\0') { v = *str; if (v > UCS_MAX) { /* This cannot happen, but just in case.. */ return (idn_invalid_codepoint); } else if (v > UNICODE_MAX) { /* It is invalid.. */ *found = str; return (idn_success); } else { last_char = (*(handle->biditype_proc))(v); if (found_r_al && last_char == idn_biditype_l) { *found = str; return (idn_success); } if (first_char != idn_biditype_r_al && last_char == idn_biditype_r_al) { *found = str; return (idn_success); } if (last_char == idn_biditype_r_al) { found_r_al = 1; } } str++; } if (found_r_al) { if (last_char != idn_biditype_r_al) { *found = str - 1; return (idn_success); } } *found = NULL; return (idn_success); }
static idn_result_t label_idnencode_ace(idn_resconf_t ctx, labellist_t label) { idn_converter_t idn_converter = NULL; const unsigned long *from; char *ascii_to = NULL; unsigned long *to = NULL; size_t to_length; idn_result_t r; from = labellist_getname(label); TRACE(("res ucs4toidn(label=\"%s\")\n", idn__debug_ucs4xstring(from, 50))); idn_converter = idn_resconf_getidnconverter(ctx); if (idn_converter == NULL) { r = idn_success; goto ret; } ascii_to = NULL; to_length = idn_ucs4_strlen(from) * 4 + 16; /* add mergin */ for (;;) { char *new_buffer; new_buffer = (char *) realloc(ascii_to, to_length); if (new_buffer == NULL) { r = idn_nomemory; goto ret; } ascii_to = new_buffer; r = idn_converter_convfromucs4(idn_converter, from, ascii_to, to_length); if (r == idn_success) break; else if (r != idn_buffer_overflow) goto ret; to_length *= 2; } for (;;) { unsigned long *new_buffer; new_buffer = (unsigned long *) realloc(to, sizeof(long) * to_length); if (new_buffer == NULL) { r = idn_nomemory; goto ret; } to = new_buffer; r = idn_ucs4_utf8toucs4(ascii_to, to, to_length); if (r == idn_success) break; else if (r != idn_buffer_overflow) goto ret; to_length *= 2; } if (r != idn_success) goto ret; r = labellist_setname(label, to); ret: if (r == idn_success) { TRACE(("res ucs4toidn(): success (label=\"%s\")\n", idn__debug_ucs4xstring(labellist_getname(label), 50))); } else { TRACE(("res ucs4toidn(): %s\n", idn_result_tostring(r))); } if (idn_converter != NULL) idn_converter_destroy(idn_converter); free(to); free(ascii_to); return (r); }