Esempio n. 1
0
File: psl2c.c Progetto: cicku/libpsl
static void _add_punycode_if_needed(_psl_vector_t *v)
{
	int it, n;

	/* do not use 'it < v->cur' since v->cur is changed by _vector_add() ! */
	for (it = 0, n = v->cur; it < n; it++) {
		_psl_entry_t *e = _vector_get(v, it);

		if (_str_needs_encoding(e->label_buf)) {
			_psl_entry_t suffix, *suffixp;
			char lookupname[64] = "";

			/* this is much slower than the libidn2 API but should have no license issues */
			FILE *pp;
			char cmd[16 + sizeof(e->label_buf)];
			snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf);
			if ((pp = popen(cmd, "r"))) {
				if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) {
					/* fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname); */
					_suffix_init(&suffix, lookupname, strlen(lookupname));
					suffix.wildcard = e->wildcard;
					suffixp = _vector_get(v, _vector_add(v, &suffix));
					suffixp->label = suffixp->label_buf; /* set label to changed address */
				}
				pclose(pp);
			} else
				fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd);
		}
	}

	_vector_sort(v);
}
Esempio n. 2
0
File: psl.c Progetto: jcajka/libpsl
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e)
{
	char *lookupname = NULL;
	int rc;
	uint8_t *lower, resbuf[256];
	size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */

	if (_str_is_ascii(e->label_buf))
		return;

	/* we need a conversion to lowercase */
	lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len);
	if (!lower) {
		/* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */
		return;
	}

	/* u8_tolower() does not terminate the result string */
	if (lower == resbuf) {
		lower[len]=0;
	} else {
		uint8_t *tmp = lower;
		lower = (uint8_t *)strndup((char *)lower, len);
		free(tmp);
	}

	if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) {
		if (strcmp(e->label_buf, lookupname)) {
			_psl_entry_t suffix, *suffixp;

			/* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */
			_suffix_init(&suffix, lookupname, strlen(lookupname));
			suffix.wildcard = e->wildcard;
			suffixp = _vector_get(v, _vector_add(v, &suffix));
			suffixp->label = suffixp->label_buf; /* set label to changed address */
		} /* else ignore */
	} /* else
		fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */

	if (lower != resbuf)
		free(lower);
}
Esempio n. 3
0
File: psl.c Progetto: jcajka/libpsl
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e)
{
	char *lookupname = NULL;
	int rc;

	if (_str_is_ascii(e->label_buf))
		return;

	/* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */

	if ((rc = idna_to_ascii_8z(e->label_buf, &lookupname, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
		if (strcmp(e->label_buf, lookupname)) {
			_psl_entry_t suffix, *suffixp;

			/* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */
			_suffix_init(&suffix, lookupname, strlen(lookupname));
			suffix.wildcard = e->wildcard;
			suffixp = _vector_get(v, _vector_add(v, &suffix));
			suffixp->label = suffixp->label_buf; /* set label to changed address */
		} /* else ignore */
	} /* else
		fprintf(_(stderr, "toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); */
}
Esempio n. 4
0
File: psl.c Progetto: jcajka/libpsl
static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e)
{
	if (_str_is_ascii(e->label_buf))
		return;

	/* IDNA2008 UTS#46 punycode conversion */
	if (idna) {
		char lookupname[128] = "";
		UErrorCode status = 0;
		UIDNAInfo info = UIDNA_INFO_INITIALIZER;
		UChar utf16_dst[128], utf16_src[128];
		int32_t utf16_src_length;

		u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, -1, &status);
		if (U_SUCCESS(status)) {
			int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status);
			if (U_SUCCESS(status)) {
				u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
				if (U_SUCCESS(status)) {
					if (strcmp(e->label_buf, lookupname)) {
						_psl_entry_t suffix, *suffixp;

						/* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */
						_suffix_init(&suffix, lookupname, strlen(lookupname));
						suffix.wildcard = e->wildcard;
						suffixp = _vector_get(v, _vector_add(v, &suffix));
						suffixp->label = suffixp->label_buf; /* set label to changed address */
					} /* else ignore */
				} /* else
					fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
			} /* else
				fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); */
		} /* else
			fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */
	}
}
Esempio n. 5
0
File: psl.c Progetto: jcajka/libpsl
/**
 * psl_load_fp:
 * @fp: FILE pointer
 *
 * This function loads the public suffixes from a FILE pointer.
 * To free the allocated resources, call psl_free().
 *
 * The suffixes are expected to be lowercase UTF-8 encoded if they are international.
 *
 * Returns: Pointer to a PSL context or %NULL on failure.
 *
 * Since: 0.1
 */
psl_ctx_t *psl_load_fp(FILE *fp)
{
	psl_ctx_t *psl;
	_psl_entry_t suffix, *suffixp;
	char buf[256], *linep, *p;
#ifdef WITH_LIBICU
	UIDNA *idna;
	UErrorCode status = 0;
#endif

	if (!fp)
		return NULL;

	if (!(psl = calloc(1, sizeof(psl_ctx_t))))
		return NULL;

#ifdef WITH_LIBICU
	idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status);
#endif

	/*
	 *  as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
	 *  as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
	 */
	psl->suffixes = _vector_alloc(8*1024, _suffix_compare);
	psl->suffix_exceptions = _vector_alloc(64, _suffix_compare);

	while ((linep = fgets(buf, sizeof(buf), fp))) {
		while (isspace(*linep)) linep++; /* ignore leading whitespace */
		if (!*linep) continue; /* skip empty lines */

		if (*linep == '/' && linep[1] == '/')
			continue; /* skip comments */

		/* parse suffix rule */
		for (p = linep; *linep && !isspace(*linep);) linep++;
		*linep = 0;

		if (*p == '!') {
			/* add to exceptions */
			if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) {
				suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
				suffixp->label = suffixp->label_buf; /* set label to changed address */
#ifdef WITH_LIBICU
				_add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp);
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
				_add_punycode_if_needed(psl->suffix_exceptions, suffixp);
#endif
			}
		} else {
			/* add to suffixes */
			if (_suffix_init(&suffix, p, linep - p) == 0) {
				suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
				suffixp->label = suffixp->label_buf; /* set label to changed address */
#ifdef WITH_LIBICU
				_add_punycode_if_needed(idna, psl->suffixes, suffixp);
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
				_add_punycode_if_needed(psl->suffixes, suffixp);
#endif
			}
		}
	}

	_vector_sort(psl->suffix_exceptions);
	_vector_sort(psl->suffixes);

#ifdef WITH_LIBICU
	if (idna)
		uidna_close(idna);
#endif

	return psl;
}