Пример #1
0
Файл: utf8.c Проект: Exim/exim
uschar *
string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err)
{
uschar * s1, * s;
int rc;

#ifdef SUPPORT_I18N_2008
/* Avoid lowercasing plain-ascii domains */
if (!string_is_utf8(utf8))
  return string_copy(utf8);

/* Only lowercase is accepted by the library call.  A pity since we lose
any mixed-case annotation.  This does not really matter for a domain. */
  {
  uschar c;
  for (s1 = s = US utf8; (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
    {
    s = string_copy(utf8);
    for (s1 = s + (s1 - utf8); (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
      *s1 = tolower(c);
    break;
    }
  }
if ((rc = idn2_lookup_u8((const uint8_t *) s, &s1, IDN2_NFC_INPUT)) != IDN2_OK)
  {
  if (err) *err = US idn2_strerror(rc);
  return NULL;
  }
#else
s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1);
if (  (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED))
   != IDNA_SUCCESS)
  {
  free(s);
  if (err) *err = US idna_strerror(rc);
  return NULL;
  }
free(s);
#endif
s = string_copy(s1);
free(s1);
return s;
}
Пример #2
0
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e)
{
	char *lookupname = NULL;
	int rc;
	uint8_t *lower, resbuf[256];
	size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */

	if (_str_is_ascii(e->label_buf))
		return;

	/* we need a conversion to lowercase */
	lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len);
	if (!lower) {
		/* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */
		return;
	}

	/* u8_tolower() does not terminate the result string */
	if (lower == resbuf) {
		lower[len]=0;
	} else {
		uint8_t *tmp = lower;
		lower = (uint8_t *)strndup((char *)lower, len);
		free(tmp);
	}

	if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) {
		if (strcmp(e->label_buf, lookupname)) {
			_psl_entry_t suffix, *suffixp;

			/* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */
			_suffix_init(&suffix, lookupname, strlen(lookupname));
			suffix.wildcard = e->wildcard;
			suffixp = _vector_get(v, _vector_add(v, &suffix));
			suffixp->label = suffixp->label_buf; /* set label to changed address */
		} /* else ignore */
	} /* else
		fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */

	if (lower != resbuf)
		free(lower);
}
Пример #3
0
const char *wget_str_to_ascii(const char *src)
{
#ifdef WITH_LIBIDN2
	if (wget_str_needs_encoding(src)) {
		char *asc = NULL;
		int rc;
#ifdef WITH_LIBUNISTRING
		uint8_t *lower, resbuf[256];
		size_t len = sizeof(resbuf) - 1; // leave space for additional \0 byte

		// we need a conversion to lowercase
		lower = u8_tolower((uint8_t *)src, u8_strlen((uint8_t *)src), 0, UNINORM_NFKC, resbuf, &len);
		if (!lower) {
			error_printf("u8_tolower(%s) failed (%d)\n", src, errno);
			return src;
		}

		// u8_tolower() does not terminate the result string
		if (lower == resbuf) {
			lower[len]=0;
		} else {
			uint8_t *tmp = lower;
			lower = (uint8_t *)wget_strmemdup((char *)lower, len);
			xfree(tmp);
		}

		if ((rc = idn2_lookup_u8(lower, (uint8_t **)&asc, 0)) == IDN2_OK) {
			debug_printf("idn2 '%s' -> '%s'\n", src, asc);
			src = asc;
		} else
			error_printf(_("toASCII(%s) failed (%d): %s\n"), lower, rc, idn2_strerror(rc));

		if (lower != resbuf)
			xfree(lower);
#else
		if ((rc = idn2_lookup_u8((uint8_t *)src, (uint8_t **)&asc, 0)) == IDN2_OK) {
			debug_printf("idn2 '%s' -> '%s'\n", src, asc);
			src = asc;
		} else
			error_printf(_("toASCII(%s) failed (%d): %s\n"), src, rc, idn2_strerror(rc));
#endif
	}
#elif WITH_LIBIDN
	if (wget_str_needs_encoding(src)) {
		char *asc = NULL;
		int rc;

		if (_utf8_is_valid(src)) {
			// idna_to_ascii_8z() automatically converts UTF-8 to lowercase

			if ((rc = idna_to_ascii_8z(src, &asc, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
				// debug_printf("toASCII '%s' -> '%s'\n", src, asc);
				src = asc;
			} else
				error_printf(_("toASCII failed (%d): %s\n"), rc, idna_strerror(rc));
		}
		else
			error_printf(_("Invalid UTF-8 sequence not converted: '%s'\n"), src);
	}
#else
	if (wget_str_needs_encoding(src)) {
		error_printf(_("toASCII not available: '%s'\n"), src);
	}
#endif

	return src;
}
Пример #4
0
static int read_rules(FILE *fp, char const *fname, loose_trie **root)
/*
* populate the root node with all rules.
* return number of bad lines, or -1 if out of memory
*/
{
	char buf[512];
	char *s;
	int lineno = 0, bad = 0;

	while ((s = fgets(buf, sizeof buf, fp)) != NULL)
	{
		++lineno;
		if (s[0] == '/' && s[1] == '/')
			continue;

		int is_ascii = 1;
		int ch;
		while ((ch = *(unsigned char *)s++) != 0)
		{
			if (ch & 0x80) // utf-8, check it is a valid sequence
			{
				int m = 0x40;
				is_ascii = (ch & m) != 0? 0: -1;
				while ((ch & m) != 0 && is_ascii == 0)
				{
					is_ascii = (*(unsigned char*)s++ & 0xc0) == 0x80? 0: -1;
					m >>= 1;
				}
				continue;
			}

			if (isspace(ch)) // end of rule
			{
				*--s = 0;
				break;
			}
		}

		if (ch == 0)
		{
			(*do_report)(LOG_CRIT, "Line too long at %s:%d: \"%.10s...\"",
				fname, lineno, buf);
			++bad;
			while ((ch = fgetc(fp)) != '\n' && ch != EOF)
				;
			continue;
		}

		assert(*s == 0);

		size_t len = s - &buf[0];
		if (len == 0) // empty line
			continue;

		if (!is_ascii)
		{
			if (is_ascii < 0)
			{
				(*do_report)(LOG_CRIT, "Bad UTF-8 sequence at %s:%d: \"%s\"",
					fname, lineno, buf);
				++bad;
				continue;
			}

			uint8_t norm[128];
			size_t ulen = sizeof norm - 1;
			uint8_t* n = u8_tolower((uint8_t*)buf, len, NULL, UNINORM_NFC, norm, &ulen);
			if (n != &norm[0])
			{
				(*do_report)(LOG_CRIT, "Failed u8_tolower at %s:%d: %s, len = %zu for \"%s\"",
					fname, lineno, strerror(errno), ulen, buf);
				free(n);
				++bad;
				continue;
			}

			n[ulen] = 0;

			uint8_t *xn = NULL;
			int rtc = idn2_lookup_u8(n, &xn, 0);
			if (rtc != IDN2_OK || xn == NULL || (len = strlen((char*)xn)) >= sizeof buf)
			{
				(*do_report)(LOG_CRIT, "IDNA failed at %s:%d: %s for \"%s\"",
					fname, lineno, idn2_strerror_name(rtc), buf);
				++bad;
				continue;
			}

			memcpy(buf, xn, len);
			idn2_free(xn);
			buf[len] = 0;
		}

		char **labels = reverse_labels(buf, len, "!*");
		if (labels == NULL)
		{
			(*do_report)(LOG_CRIT, "Invalid domain at %s:%d for \"%s\"",
				fname, lineno, buf);
			++bad;
			continue;
		}

		loose_trie *node = add_trie_node(root, *labels);
		for (size_t i = 1; labels[i] && node; ++i)
			node = add_trie_node(&node->child, labels[i]);

		free(labels);
		if (node == NULL) // out of memory
			return -1;

		node->is_terminal = 1;
	}