uschar * string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err) { uschar * s1, * s; int rc; #ifdef SUPPORT_I18N_2008 /* Avoid lowercasing plain-ascii domains */ if (!string_is_utf8(utf8)) return string_copy(utf8); /* Only lowercase is accepted by the library call. A pity since we lose any mixed-case annotation. This does not really matter for a domain. */ { uschar c; for (s1 = s = US utf8; (c = *s1); s1++) if (!(c & 0x80) && isupper(c)) { s = string_copy(utf8); for (s1 = s + (s1 - utf8); (c = *s1); s1++) if (!(c & 0x80) && isupper(c)) *s1 = tolower(c); break; } } if ((rc = idn2_lookup_u8((const uint8_t *) s, &s1, IDN2_NFC_INPUT)) != IDN2_OK) { if (err) *err = US idn2_strerror(rc); return NULL; } #else s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1); if ( (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED)) != IDNA_SUCCESS) { free(s); if (err) *err = US idna_strerror(rc); return NULL; } free(s); #endif s = string_copy(s1); free(s1); return s; }
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) { char *lookupname = NULL; int rc; uint8_t *lower, resbuf[256]; size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */ if (_str_is_ascii(e->label_buf)) return; /* we need a conversion to lowercase */ lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len); if (!lower) { /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */ return; } /* u8_tolower() does not terminate the result string */ if (lower == resbuf) { lower[len]=0; } else { uint8_t *tmp = lower; lower = (uint8_t *)strndup((char *)lower, len); free(tmp); } if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) { if (strcmp(e->label_buf, lookupname)) { _psl_entry_t suffix, *suffixp; /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ } /* else fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */ if (lower != resbuf) free(lower); }
const char *wget_str_to_ascii(const char *src) { #ifdef WITH_LIBIDN2 if (wget_str_needs_encoding(src)) { char *asc = NULL; int rc; #ifdef WITH_LIBUNISTRING uint8_t *lower, resbuf[256]; size_t len = sizeof(resbuf) - 1; // leave space for additional \0 byte // we need a conversion to lowercase lower = u8_tolower((uint8_t *)src, u8_strlen((uint8_t *)src), 0, UNINORM_NFKC, resbuf, &len); if (!lower) { error_printf("u8_tolower(%s) failed (%d)\n", src, errno); return src; } // u8_tolower() does not terminate the result string if (lower == resbuf) { lower[len]=0; } else { uint8_t *tmp = lower; lower = (uint8_t *)wget_strmemdup((char *)lower, len); xfree(tmp); } if ((rc = idn2_lookup_u8(lower, (uint8_t **)&asc, 0)) == IDN2_OK) { debug_printf("idn2 '%s' -> '%s'\n", src, asc); src = asc; } else error_printf(_("toASCII(%s) failed (%d): %s\n"), lower, rc, idn2_strerror(rc)); if (lower != resbuf) xfree(lower); #else if ((rc = idn2_lookup_u8((uint8_t *)src, (uint8_t **)&asc, 0)) == IDN2_OK) { debug_printf("idn2 '%s' -> '%s'\n", src, asc); src = asc; } else error_printf(_("toASCII(%s) failed (%d): %s\n"), src, rc, idn2_strerror(rc)); #endif } #elif WITH_LIBIDN if (wget_str_needs_encoding(src)) { char *asc = NULL; int rc; if (_utf8_is_valid(src)) { // idna_to_ascii_8z() automatically converts UTF-8 to lowercase if ((rc = idna_to_ascii_8z(src, &asc, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) { // debug_printf("toASCII '%s' -> '%s'\n", src, asc); src = asc; } else error_printf(_("toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); } else error_printf(_("Invalid UTF-8 sequence not converted: '%s'\n"), src); } #else if (wget_str_needs_encoding(src)) { error_printf(_("toASCII not available: '%s'\n"), src); } #endif return src; }
static int read_rules(FILE *fp, char const *fname, loose_trie **root) /* * populate the root node with all rules. * return number of bad lines, or -1 if out of memory */ { char buf[512]; char *s; int lineno = 0, bad = 0; while ((s = fgets(buf, sizeof buf, fp)) != NULL) { ++lineno; if (s[0] == '/' && s[1] == '/') continue; int is_ascii = 1; int ch; while ((ch = *(unsigned char *)s++) != 0) { if (ch & 0x80) // utf-8, check it is a valid sequence { int m = 0x40; is_ascii = (ch & m) != 0? 0: -1; while ((ch & m) != 0 && is_ascii == 0) { is_ascii = (*(unsigned char*)s++ & 0xc0) == 0x80? 0: -1; m >>= 1; } continue; } if (isspace(ch)) // end of rule { *--s = 0; break; } } if (ch == 0) { (*do_report)(LOG_CRIT, "Line too long at %s:%d: \"%.10s...\"", fname, lineno, buf); ++bad; while ((ch = fgetc(fp)) != '\n' && ch != EOF) ; continue; } assert(*s == 0); size_t len = s - &buf[0]; if (len == 0) // empty line continue; if (!is_ascii) { if (is_ascii < 0) { (*do_report)(LOG_CRIT, "Bad UTF-8 sequence at %s:%d: \"%s\"", fname, lineno, buf); ++bad; continue; } uint8_t norm[128]; size_t ulen = sizeof norm - 1; uint8_t* n = u8_tolower((uint8_t*)buf, len, NULL, UNINORM_NFC, norm, &ulen); if (n != &norm[0]) { (*do_report)(LOG_CRIT, "Failed u8_tolower at %s:%d: %s, len = %zu for \"%s\"", fname, lineno, strerror(errno), ulen, buf); free(n); ++bad; continue; } n[ulen] = 0; uint8_t *xn = NULL; int rtc = idn2_lookup_u8(n, &xn, 0); if (rtc != IDN2_OK || xn == NULL || (len = strlen((char*)xn)) >= sizeof buf) { (*do_report)(LOG_CRIT, "IDNA failed at %s:%d: %s for \"%s\"", fname, lineno, idn2_strerror_name(rtc), buf); ++bad; continue; } memcpy(buf, xn, len); idn2_free(xn); buf[len] = 0; } char **labels = reverse_labels(buf, len, "!*"); if (labels == NULL) { (*do_report)(LOG_CRIT, "Invalid domain at %s:%d for \"%s\"", fname, lineno, buf); ++bad; continue; } loose_trie *node = add_trie_node(root, *labels); for (size_t i = 1; labels[i] && node; ++i) node = add_trie_node(&node->child, labels[i]); free(labels); if (node == NULL) // out of memory return -1; node->is_terminal = 1; }