Example #1
0
File: utf8.c Project: Exim/exim
uschar *
string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err)
{
uschar * s1, * s;
int rc;

#ifdef SUPPORT_I18N_2008
/* Avoid lowercasing plain-ascii domains */
if (!string_is_utf8(utf8))
  return string_copy(utf8);

/* Only lowercase is accepted by the library call.  A pity since we lose
any mixed-case annotation.  This does not really matter for a domain. */
  {
  uschar c;
  for (s1 = s = US utf8; (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
    {
    s = string_copy(utf8);
    for (s1 = s + (s1 - utf8); (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
      *s1 = tolower(c);
    break;
    }
  }
if ((rc = idn2_lookup_u8((const uint8_t *) s, &s1, IDN2_NFC_INPUT)) != IDN2_OK)
  {
  if (err) *err = US idn2_strerror(rc);
  return NULL;
  }
#else
s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1);
if (  (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED))
   != IDNA_SUCCESS)
  {
  free(s);
  if (err) *err = US idna_strerror(rc);
  return NULL;
  }
free(s);
#endif
s = string_copy(s1);
free(s1);
return s;
}
Example #2
0
const char *wget_str_to_ascii(const char *src)
{
#ifdef WITH_LIBIDN2
	if (wget_str_needs_encoding(src)) {
		char *asc = NULL;
		int rc;
#ifdef WITH_LIBUNISTRING
		uint8_t *lower, resbuf[256];
		size_t len = sizeof(resbuf) - 1; // leave space for additional \0 byte

		// we need a conversion to lowercase
		lower = u8_tolower((uint8_t *)src, u8_strlen((uint8_t *)src), 0, UNINORM_NFKC, resbuf, &len);
		if (!lower) {
			error_printf("u8_tolower(%s) failed (%d)\n", src, errno);
			return src;
		}

		// u8_tolower() does not terminate the result string
		if (lower == resbuf) {
			lower[len]=0;
		} else {
			uint8_t *tmp = lower;
			lower = (uint8_t *)wget_strmemdup((char *)lower, len);
			xfree(tmp);
		}

		if ((rc = idn2_lookup_u8(lower, (uint8_t **)&asc, 0)) == IDN2_OK) {
			debug_printf("idn2 '%s' -> '%s'\n", src, asc);
			src = asc;
		} else
			error_printf(_("toASCII(%s) failed (%d): %s\n"), lower, rc, idn2_strerror(rc));

		if (lower != resbuf)
			xfree(lower);
#else
		if ((rc = idn2_lookup_u8((uint8_t *)src, (uint8_t **)&asc, 0)) == IDN2_OK) {
			debug_printf("idn2 '%s' -> '%s'\n", src, asc);
			src = asc;
		} else
			error_printf(_("toASCII(%s) failed (%d): %s\n"), src, rc, idn2_strerror(rc));
#endif
	}
#elif WITH_LIBIDN
	if (wget_str_needs_encoding(src)) {
		char *asc = NULL;
		int rc;

		if (_utf8_is_valid(src)) {
			// idna_to_ascii_8z() automatically converts UTF-8 to lowercase

			if ((rc = idna_to_ascii_8z(src, &asc, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
				// debug_printf("toASCII '%s' -> '%s'\n", src, asc);
				src = asc;
			} else
				error_printf(_("toASCII failed (%d): %s\n"), rc, idna_strerror(rc));
		}
		else
			error_printf(_("Invalid UTF-8 sequence not converted: '%s'\n"), src);
	}
#else
	if (wget_str_needs_encoding(src)) {
		error_printf(_("toASCII not available: '%s'\n"), src);
	}
#endif

	return src;
}