uschar * string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err) { uschar * s1, * s; int rc; #ifdef SUPPORT_I18N_2008 /* Avoid lowercasing plain-ascii domains */ if (!string_is_utf8(utf8)) return string_copy(utf8); /* Only lowercase is accepted by the library call. A pity since we lose any mixed-case annotation. This does not really matter for a domain. */ { uschar c; for (s1 = s = US utf8; (c = *s1); s1++) if (!(c & 0x80) && isupper(c)) { s = string_copy(utf8); for (s1 = s + (s1 - utf8); (c = *s1); s1++) if (!(c & 0x80) && isupper(c)) *s1 = tolower(c); break; } } if ((rc = idn2_lookup_u8((const uint8_t *) s, &s1, IDN2_NFC_INPUT)) != IDN2_OK) { if (err) *err = US idn2_strerror(rc); return NULL; } #else s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1); if ( (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED)) != IDNA_SUCCESS) { free(s); if (err) *err = US idna_strerror(rc); return NULL; } free(s); #endif s = string_copy(s1); free(s1); return s; }
const char *wget_str_to_ascii(const char *src) { #ifdef WITH_LIBIDN2 if (wget_str_needs_encoding(src)) { char *asc = NULL; int rc; #ifdef WITH_LIBUNISTRING uint8_t *lower, resbuf[256]; size_t len = sizeof(resbuf) - 1; // leave space for additional \0 byte // we need a conversion to lowercase lower = u8_tolower((uint8_t *)src, u8_strlen((uint8_t *)src), 0, UNINORM_NFKC, resbuf, &len); if (!lower) { error_printf("u8_tolower(%s) failed (%d)\n", src, errno); return src; } // u8_tolower() does not terminate the result string if (lower == resbuf) { lower[len]=0; } else { uint8_t *tmp = lower; lower = (uint8_t *)wget_strmemdup((char *)lower, len); xfree(tmp); } if ((rc = idn2_lookup_u8(lower, (uint8_t **)&asc, 0)) == IDN2_OK) { debug_printf("idn2 '%s' -> '%s'\n", src, asc); src = asc; } else error_printf(_("toASCII(%s) failed (%d): %s\n"), lower, rc, idn2_strerror(rc)); if (lower != resbuf) xfree(lower); #else if ((rc = idn2_lookup_u8((uint8_t *)src, (uint8_t **)&asc, 0)) == IDN2_OK) { debug_printf("idn2 '%s' -> '%s'\n", src, asc); src = asc; } else error_printf(_("toASCII(%s) failed (%d): %s\n"), src, rc, idn2_strerror(rc)); #endif } #elif WITH_LIBIDN if (wget_str_needs_encoding(src)) { char *asc = NULL; int rc; if (_utf8_is_valid(src)) { // idna_to_ascii_8z() automatically converts UTF-8 to lowercase if ((rc = idna_to_ascii_8z(src, &asc, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) { // debug_printf("toASCII '%s' -> '%s'\n", src, asc); src = asc; } else error_printf(_("toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); } else error_printf(_("Invalid UTF-8 sequence not converted: '%s'\n"), src); } #else if (wget_str_needs_encoding(src)) { error_printf(_("toASCII not available: '%s'\n"), src); } #endif return src; }