示例#1
0
文件: psl2c.c 项目: cicku/libpsl
static void _add_punycode_if_needed(_psl_vector_t *v)
{
	int it, n;

	/* do not use 'it < v->cur' since v->cur is changed by _vector_add() ! */
	for (it = 0, n = v->cur; it < n; it++) {
		_psl_entry_t *e = _vector_get(v, it);

		if (_str_needs_encoding(e->label_buf)) {
			_psl_entry_t suffix, *suffixp;
			char lookupname[64] = "";

			/* this is much slower than the libidn2 API but should have no license issues */
			FILE *pp;
			char cmd[16 + sizeof(e->label_buf)];
			snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf);
			if ((pp = popen(cmd, "r"))) {
				if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) {
					/* fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname); */
					_suffix_init(&suffix, lookupname, strlen(lookupname));
					suffix.wildcard = e->wildcard;
					suffixp = _vector_get(v, _vector_add(v, &suffix));
					suffixp->label = suffixp->label_buf; /* set label to changed address */
				}
				pclose(pp);
			} else
				fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd);
		}
	}

	_vector_sort(v);
}
示例#2
0
文件: psl2c.c 项目: remicollet/libpsl
static int _print_psl_entries_dafsa_binary(const char *fname, const _psl_vector_t *v)
{
	FILE *fp;
	int ret = 0, it, rc;
	char cmd[256];

	if ((fp = fopen("in.tmp", "w"))) {
		for (it = 0; it < v->cur; it++) {
			_psl_entry_t *e = _vector_get(v, it);
			unsigned char *s = (unsigned char *)e->label_buf;

			/* search for non-ASCII label and skip it */
			while (*s && *s < 128) s++;
			if (*s) continue;

			fprintf(fp, "%s, %X\n", e->label_buf, (int) (e->flags & 0x0F));
		}

		fclose(fp);
	} else {
		fprintf(stderr, "Failed to write open 'in.tmp'\n");
		return 3;
	}

	snprintf(cmd, sizeof(cmd), MAKE_DAFSA " --binary in.tmp %s", fname);
	if ((rc = system(cmd))) {
		fprintf(stderr, "Failed to execute '%s' (%d)\n", cmd, rc);
		ret = 2;
	}

	unlink("in.tmp");
	return ret;
}
示例#3
0
文件: psl2c.c 项目: cicku/libpsl
static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname)
{
	int it;

#ifdef BUILTIN_GENERATOR_LIBICU
	do {
		UVersionInfo version_info;
		char version[U_MAX_VERSION_STRING_LENGTH];

		u_getVersion(version_info);
		u_versionToString(version_info, version);
		fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version);
	} while (0);
#elif defined(BUILTIN_GENERATOR_LIBIDN2)
		fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn2/%s) */\n", idn2_check_version(NULL));
#elif defined(BUILTIN_GENERATOR_LIBIDN)
		fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL));
#else
	fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n");
#endif

	fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname);

	for (it = 0; it < v->cur; it++) {
		_psl_entry_t *e = _vector_get(v, it);

		fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n",
			e->label_buf, e->length, (int) e->nlabels, (int) e->flags);
	}

	fprintf(fpout, "};\n");
}
示例#4
0
文件: vector.c 项目: Oneiroi/haka
void *_vector_push(struct vector *v, size_t elemsize)
{
	if (v->count == v->allocated_count) {
		if (!vector_grow(v)) {
			return NULL;
		}
	}

	vector_resize(v, v->count+1);
	return _vector_get(v, elemsize, v->count-1);
}
示例#5
0
文件: psl2c.c 项目: remicollet/libpsl
static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v)
{
	FILE *fp;
	int it;

#ifdef BUILTIN_GENERATOR_LIBICU
	do {
		UVersionInfo version_info;
		char version[U_MAX_VERSION_STRING_LENGTH];

		u_getVersion(version_info);
		u_versionToString(version_info, version);
		fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version);
	} while (0);
#elif defined(BUILTIN_GENERATOR_LIBIDN2)
	fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn2/%s) */\n", idn2_check_version(NULL));
#elif defined(BUILTIN_GENERATOR_LIBIDN)
	fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL));
#else
	fprintf(fpout, "/* automatically generated by psl2c (punycode generated internally) */\n");
#endif

	if ((fp = fopen("in.tmp", "w"))) {
		for (it = 0; it < v->cur; it++) {
			_psl_entry_t *e = _vector_get(v, it);
			unsigned char *s = (unsigned char *)e->label_buf;

			/* search for non-ASCII label and skip it */
			while (*s && *s < 128) s++;
			if (*s) continue;

			fprintf(fp, "%s, %X\n", e->label_buf, (int) (e->flags & 0x0F));
		}

		fclose(fp);
	}

	if ((it = system(MAKE_DAFSA " in.tmp out.tmp")))
		fprintf(stderr, "Failed to execute " MAKE_DAFSA "\n");

	if ((fp = fopen("out.tmp", "r"))) {
		char buf[256];

		while (fgets(buf, sizeof(buf), fp))
			fputs(buf, fpout);

		fclose(fp);
	}

	unlink("in.tmp");
	unlink("out.tmp");
}
示例#6
0
文件: psl.c 项目: jcajka/libpsl
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e)
{
	char *lookupname = NULL;
	int rc;
	uint8_t *lower, resbuf[256];
	size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */

	if (_str_is_ascii(e->label_buf))
		return;

	/* we need a conversion to lowercase */
	lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len);
	if (!lower) {
		/* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */
		return;
	}

	/* u8_tolower() does not terminate the result string */
	if (lower == resbuf) {
		lower[len]=0;
	} else {
		uint8_t *tmp = lower;
		lower = (uint8_t *)strndup((char *)lower, len);
		free(tmp);
	}

	if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) {
		if (strcmp(e->label_buf, lookupname)) {
			_psl_entry_t suffix, *suffixp;

			/* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */
			_suffix_init(&suffix, lookupname, strlen(lookupname));
			suffix.wildcard = e->wildcard;
			suffixp = _vector_get(v, _vector_add(v, &suffix));
			suffixp->label = suffixp->label_buf; /* set label to changed address */
		} /* else ignore */
	} /* else
		fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */

	if (lower != resbuf)
		free(lower);
}
示例#7
0
文件: psl.c 项目: jcajka/libpsl
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e)
{
	char *lookupname = NULL;
	int rc;

	if (_str_is_ascii(e->label_buf))
		return;

	/* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */

	if ((rc = idna_to_ascii_8z(e->label_buf, &lookupname, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
		if (strcmp(e->label_buf, lookupname)) {
			_psl_entry_t suffix, *suffixp;

			/* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */
			_suffix_init(&suffix, lookupname, strlen(lookupname));
			suffix.wildcard = e->wildcard;
			suffixp = _vector_get(v, _vector_add(v, &suffix));
			suffixp->label = suffixp->label_buf; /* set label to changed address */
		} /* else ignore */
	} /* else
		fprintf(_(stderr, "toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); */
}
示例#8
0
文件: psl.c 项目: jcajka/libpsl
static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e)
{
	if (_str_is_ascii(e->label_buf))
		return;

	/* IDNA2008 UTS#46 punycode conversion */
	if (idna) {
		char lookupname[128] = "";
		UErrorCode status = 0;
		UIDNAInfo info = UIDNA_INFO_INITIALIZER;
		UChar utf16_dst[128], utf16_src[128];
		int32_t utf16_src_length;

		u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, -1, &status);
		if (U_SUCCESS(status)) {
			int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status);
			if (U_SUCCESS(status)) {
				u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status);
				if (U_SUCCESS(status)) {
					if (strcmp(e->label_buf, lookupname)) {
						_psl_entry_t suffix, *suffixp;

						/* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */
						_suffix_init(&suffix, lookupname, strlen(lookupname));
						suffix.wildcard = e->wildcard;
						suffixp = _vector_get(v, _vector_add(v, &suffix));
						suffixp->label = suffixp->label_buf; /* set label to changed address */
					} /* else ignore */
				} /* else
					fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
			} /* else
				fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); */
		} /* else
			fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */
	}
}
示例#9
0
文件: psl2c.c 项目: remicollet/libpsl
static int _check_psl(const psl_ctx_t *psl)
{
	int it, pos, err = 0;

	/* check if plain suffix also appears in exceptions */
	for (it = 0; it < psl->suffixes->cur; it++) {
		_psl_entry_t *e = _vector_get(psl->suffixes, it);

		if (!e->wildcard && _vector_find(psl->suffix_exceptions, e) >= 0) {
			fprintf(stderr, "Found entry '%s' also in exceptions\n", e->label);
			err = 1;
		}
	}

	/* check if exception also appears in suffix list as plain entry */
	for (it = 0; it < psl->suffix_exceptions->cur; it++) {
		_psl_entry_t *e2, *e = _vector_get(psl->suffix_exceptions, it);

		if ((e2 = _vector_get(psl->suffixes, pos = _vector_find(psl->suffixes, e)))) {
			if (!e2->wildcard) {
				fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label);
				err = 1;
			}
			/* Two same domains in a row are allowed: wildcard and non-wildcard.
			 * Binary search find either of them, so also check previous and next entry. */
			else if (pos > 0 && _suffix_compare(e, e2 = _vector_get(psl->suffixes, pos - 1)) == 0 && !e2->wildcard) {
				fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label);
				err = 1;
			}
			else if (pos < psl->suffixes->cur - 1 && _suffix_compare(e, e2 = _vector_get(psl->suffixes, pos + 1)) == 0 && !e2->wildcard) {
				fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label);
				err = 1;
			}
		}
	}

	/* check if non-wildcard entry is already covered by wildcard entry */
	for (it = 0; it < psl->suffixes->cur; it++) {
		const char *p;
		_psl_entry_t *e = _vector_get(psl->suffixes, it);

		if (e->nlabels > 1 && !e->wildcard && (p = strchr(e->label, '.'))) {
			_psl_entry_t *e2, *e3, suffix;

			suffix.label = p + 1;
			suffix.length = strlen(p + 1);
			suffix.nlabels = e->nlabels - 1;

			e2 = _vector_get(psl->suffixes, pos = _vector_find(psl->suffixes, &suffix));

			if (e2) {
				if (e2->wildcard) {
					fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label);
					err = 1;
				}
				/* Two same domains in a row are allowed: wildcard and non-wildcard.
				* Binary search find either of them, so also check previous and next entry. */
				else if (pos > 0 && _suffix_compare(e2, e3 = _vector_get(psl->suffixes, pos - 1)) == 0 && e3->wildcard) {
					fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label);
					err = 1;
				}
				else if (pos < psl->suffixes->cur - 1 && _suffix_compare(e2, e3 = _vector_get(psl->suffixes, pos + 1)) == 0 && e3->wildcard) {
					fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label);
					err = 1;
				}
			}
		}
	}

	return err;
}
示例#10
0
文件: vector.c 项目: 52nlp/icsiboost
extern void* vector_get(vector_t* v, size_t index) { return *(void**) _vector_get(v, index); }
示例#11
0
文件: psl.c 项目: jcajka/libpsl
/**
 * psl_load_fp:
 * @fp: FILE pointer
 *
 * This function loads the public suffixes from a FILE pointer.
 * To free the allocated resources, call psl_free().
 *
 * The suffixes are expected to be lowercase UTF-8 encoded if they are international.
 *
 * Returns: Pointer to a PSL context or %NULL on failure.
 *
 * Since: 0.1
 */
psl_ctx_t *psl_load_fp(FILE *fp)
{
	psl_ctx_t *psl;
	_psl_entry_t suffix, *suffixp;
	char buf[256], *linep, *p;
#ifdef WITH_LIBICU
	UIDNA *idna;
	UErrorCode status = 0;
#endif

	if (!fp)
		return NULL;

	if (!(psl = calloc(1, sizeof(psl_ctx_t))))
		return NULL;

#ifdef WITH_LIBICU
	idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status);
#endif

	/*
	 *  as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
	 *  as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
	 */
	psl->suffixes = _vector_alloc(8*1024, _suffix_compare);
	psl->suffix_exceptions = _vector_alloc(64, _suffix_compare);

	while ((linep = fgets(buf, sizeof(buf), fp))) {
		while (isspace(*linep)) linep++; /* ignore leading whitespace */
		if (!*linep) continue; /* skip empty lines */

		if (*linep == '/' && linep[1] == '/')
			continue; /* skip comments */

		/* parse suffix rule */
		for (p = linep; *linep && !isspace(*linep);) linep++;
		*linep = 0;

		if (*p == '!') {
			/* add to exceptions */
			if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) {
				suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix));
				suffixp->label = suffixp->label_buf; /* set label to changed address */
#ifdef WITH_LIBICU
				_add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp);
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
				_add_punycode_if_needed(psl->suffix_exceptions, suffixp);
#endif
			}
		} else {
			/* add to suffixes */
			if (_suffix_init(&suffix, p, linep - p) == 0) {
				suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix));
				suffixp->label = suffixp->label_buf; /* set label to changed address */
#ifdef WITH_LIBICU
				_add_punycode_if_needed(idna, psl->suffixes, suffixp);
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
				_add_punycode_if_needed(psl->suffixes, suffixp);
#endif
			}
		}
	}

	_vector_sort(psl->suffix_exceptions);
	_vector_sort(psl->suffixes);

#ifdef WITH_LIBICU
	if (idna)
		uidna_close(idna);
#endif

	return psl;
}
示例#12
0
文件: psl.c 项目: jcajka/libpsl
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
{
	_psl_entry_t suffix, *rule;
	const char *p, *label_bak;
	unsigned short length_bak;

	/* this function should be called without leading dots, just make sure */
	suffix.label = domain + (*domain == '.');
	suffix.length = strlen(suffix.label);
	suffix.wildcard = 0;
	suffix.nlabels = 1;

	for (p = suffix.label; *p; p++)
		if (*p == '.')
			suffix.nlabels++;

	/* if domain has enough labels, it is public */
	if (psl == &_builtin_psl)
		rule = &suffixes[0];
	else
		rule = _vector_get(psl->suffixes, 0);

	if (!rule || rule->nlabels < suffix.nlabels - 1)
		return 0;

	if (psl == &_builtin_psl)
		rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
	else
		rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));

	if (rule) {
		/* definitely a match, no matter if the found rule is a wildcard or not */
		return 1;
	} else if (suffix.nlabels == 1) {
		/* unknown TLD, this is the prevailing '*' match */
		return 1;
	}

	label_bak = suffix.label;
	length_bak = suffix.length;

	if ((suffix.label = strchr(suffix.label, '.'))) {
		suffix.label++;
		suffix.length = strlen(suffix.label);
		suffix.nlabels--;

		if (psl == &_builtin_psl)
			rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare);
		else
			rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix));

		if (rule) {
			if (rule->wildcard) {
				/* now that we matched a wildcard, we have to check for an exception */
				suffix.label = label_bak;
				suffix.length = length_bak;
				suffix.nlabels++;

				if (psl == &_builtin_psl) {
					if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare))
						return 0; /* found an exception, so 'domain' is not a public suffix */
				} else {
					if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0)
						return 0; /* found an exception, so 'domain' is not a public suffix */
				}

				return 1;
			}
		}
	}

	return 0;
}