static void _add_punycode_if_needed(_psl_vector_t *v) { int it, n; /* do not use 'it < v->cur' since v->cur is changed by _vector_add() ! */ for (it = 0, n = v->cur; it < n; it++) { _psl_entry_t *e = _vector_get(v, it); if (_str_needs_encoding(e->label_buf)) { _psl_entry_t suffix, *suffixp; char lookupname[64] = ""; /* this is much slower than the libidn2 API but should have no license issues */ FILE *pp; char cmd[16 + sizeof(e->label_buf)]; snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf); if ((pp = popen(cmd, "r"))) { if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) { /* fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } pclose(pp); } else fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd); } } _vector_sort(v); }
static int _print_psl_entries_dafsa_binary(const char *fname, const _psl_vector_t *v) { FILE *fp; int ret = 0, it, rc; char cmd[256]; if ((fp = fopen("in.tmp", "w"))) { for (it = 0; it < v->cur; it++) { _psl_entry_t *e = _vector_get(v, it); unsigned char *s = (unsigned char *)e->label_buf; /* search for non-ASCII label and skip it */ while (*s && *s < 128) s++; if (*s) continue; fprintf(fp, "%s, %X\n", e->label_buf, (int) (e->flags & 0x0F)); } fclose(fp); } else { fprintf(stderr, "Failed to write open 'in.tmp'\n"); return 3; } snprintf(cmd, sizeof(cmd), MAKE_DAFSA " --binary in.tmp %s", fname); if ((rc = system(cmd))) { fprintf(stderr, "Failed to execute '%s' (%d)\n", cmd, rc); ret = 2; } unlink("in.tmp"); return ret; }
static void _print_psl_entries(FILE *fpout, const _psl_vector_t *v, const char *varname) { int it; #ifdef BUILTIN_GENERATOR_LIBICU do { UVersionInfo version_info; char version[U_MAX_VERSION_STRING_LENGTH]; u_getVersion(version_info); u_versionToString(version_info, version); fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version); } while (0); #elif defined(BUILTIN_GENERATOR_LIBIDN2) fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn2/%s) */\n", idn2_check_version(NULL)); #elif defined(BUILTIN_GENERATOR_LIBIDN) fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL)); #else fprintf(fpout, "/* automatically generated by psl2c (without punycode support) */\n"); #endif fprintf(fpout, "static _psl_entry_t %s[] = {\n", varname); for (it = 0; it < v->cur; it++) { _psl_entry_t *e = _vector_get(v, it); fprintf(fpout, "\t{ \"%s\", NULL, %hd, %d, %d },\n", e->label_buf, e->length, (int) e->nlabels, (int) e->flags); } fprintf(fpout, "};\n"); }
void *_vector_push(struct vector *v, size_t elemsize) { if (v->count == v->allocated_count) { if (!vector_grow(v)) { return NULL; } } vector_resize(v, v->count+1); return _vector_get(v, elemsize, v->count-1); }
static void _print_psl_entries_dafsa(FILE *fpout, const _psl_vector_t *v) { FILE *fp; int it; #ifdef BUILTIN_GENERATOR_LIBICU do { UVersionInfo version_info; char version[U_MAX_VERSION_STRING_LENGTH]; u_getVersion(version_info); u_versionToString(version_info, version); fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libicu/%s) */\n", version); } while (0); #elif defined(BUILTIN_GENERATOR_LIBIDN2) fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn2/%s) */\n", idn2_check_version(NULL)); #elif defined(BUILTIN_GENERATOR_LIBIDN) fprintf(fpout, "/* automatically generated by psl2c (punycode generated with libidn/%s) */\n", stringprep_check_version(NULL)); #else fprintf(fpout, "/* automatically generated by psl2c (punycode generated internally) */\n"); #endif if ((fp = fopen("in.tmp", "w"))) { for (it = 0; it < v->cur; it++) { _psl_entry_t *e = _vector_get(v, it); unsigned char *s = (unsigned char *)e->label_buf; /* search for non-ASCII label and skip it */ while (*s && *s < 128) s++; if (*s) continue; fprintf(fp, "%s, %X\n", e->label_buf, (int) (e->flags & 0x0F)); } fclose(fp); } if ((it = system(MAKE_DAFSA " in.tmp out.tmp"))) fprintf(stderr, "Failed to execute " MAKE_DAFSA "\n"); if ((fp = fopen("out.tmp", "r"))) { char buf[256]; while (fgets(buf, sizeof(buf), fp)) fputs(buf, fpout); fclose(fp); } unlink("in.tmp"); unlink("out.tmp"); }
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) { char *lookupname = NULL; int rc; uint8_t *lower, resbuf[256]; size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */ if (_str_is_ascii(e->label_buf)) return; /* we need a conversion to lowercase */ lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len); if (!lower) { /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */ return; } /* u8_tolower() does not terminate the result string */ if (lower == resbuf) { lower[len]=0; } else { uint8_t *tmp = lower; lower = (uint8_t *)strndup((char *)lower, len); free(tmp); } if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) { if (strcmp(e->label_buf, lookupname)) { _psl_entry_t suffix, *suffixp; /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ } /* else fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */ if (lower != resbuf) free(lower); }
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) { char *lookupname = NULL; int rc; if (_str_is_ascii(e->label_buf)) return; /* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */ if ((rc = idna_to_ascii_8z(e->label_buf, &lookupname, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) { if (strcmp(e->label_buf, lookupname)) { _psl_entry_t suffix, *suffixp; /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ } /* else fprintf(_(stderr, "toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); */ }
static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e) { if (_str_is_ascii(e->label_buf)) return; /* IDNA2008 UTS#46 punycode conversion */ if (idna) { char lookupname[128] = ""; UErrorCode status = 0; UIDNAInfo info = UIDNA_INFO_INITIALIZER; UChar utf16_dst[128], utf16_src[128]; int32_t utf16_src_length; u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, -1, &status); if (U_SUCCESS(status)) { int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status); if (U_SUCCESS(status)) { u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status); if (U_SUCCESS(status)) { if (strcmp(e->label_buf, lookupname)) { _psl_entry_t suffix, *suffixp; /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ } /* else fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */ } /* else fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); */ } /* else fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */ } }
static int _check_psl(const psl_ctx_t *psl) { int it, pos, err = 0; /* check if plain suffix also appears in exceptions */ for (it = 0; it < psl->suffixes->cur; it++) { _psl_entry_t *e = _vector_get(psl->suffixes, it); if (!e->wildcard && _vector_find(psl->suffix_exceptions, e) >= 0) { fprintf(stderr, "Found entry '%s' also in exceptions\n", e->label); err = 1; } } /* check if exception also appears in suffix list as plain entry */ for (it = 0; it < psl->suffix_exceptions->cur; it++) { _psl_entry_t *e2, *e = _vector_get(psl->suffix_exceptions, it); if ((e2 = _vector_get(psl->suffixes, pos = _vector_find(psl->suffixes, e)))) { if (!e2->wildcard) { fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label); err = 1; } /* Two same domains in a row are allowed: wildcard and non-wildcard. * Binary search find either of them, so also check previous and next entry. */ else if (pos > 0 && _suffix_compare(e, e2 = _vector_get(psl->suffixes, pos - 1)) == 0 && !e2->wildcard) { fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label); err = 1; } else if (pos < psl->suffixes->cur - 1 && _suffix_compare(e, e2 = _vector_get(psl->suffixes, pos + 1)) == 0 && !e2->wildcard) { fprintf(stderr, "Found exception '!%s' also as suffix\n", e->label); err = 1; } } } /* check if non-wildcard entry is already covered by wildcard entry */ for (it = 0; it < psl->suffixes->cur; it++) { const char *p; _psl_entry_t *e = _vector_get(psl->suffixes, it); if (e->nlabels > 1 && !e->wildcard && (p = strchr(e->label, '.'))) { _psl_entry_t *e2, *e3, suffix; suffix.label = p + 1; suffix.length = strlen(p + 1); suffix.nlabels = e->nlabels - 1; e2 = _vector_get(psl->suffixes, pos = _vector_find(psl->suffixes, &suffix)); if (e2) { if (e2->wildcard) { fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label); err = 1; } /* Two same domains in a row are allowed: wildcard and non-wildcard. * Binary search find either of them, so also check previous and next entry. */ else if (pos > 0 && _suffix_compare(e2, e3 = _vector_get(psl->suffixes, pos - 1)) == 0 && e3->wildcard) { fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label); err = 1; } else if (pos < psl->suffixes->cur - 1 && _suffix_compare(e2, e3 = _vector_get(psl->suffixes, pos + 1)) == 0 && e3->wildcard) { fprintf(stderr, "Found superfluous '%s' already covered by '*.%s'\n", e->label, e2->label); err = 1; } } } } return err; }
extern void* vector_get(vector_t* v, size_t index) { return *(void**) _vector_get(v, index); }
/** * psl_load_fp: * @fp: FILE pointer * * This function loads the public suffixes from a FILE pointer. * To free the allocated resources, call psl_free(). * * The suffixes are expected to be lowercase UTF-8 encoded if they are international. * * Returns: Pointer to a PSL context or %NULL on failure. * * Since: 0.1 */ psl_ctx_t *psl_load_fp(FILE *fp) { psl_ctx_t *psl; _psl_entry_t suffix, *suffixp; char buf[256], *linep, *p; #ifdef WITH_LIBICU UIDNA *idna; UErrorCode status = 0; #endif if (!fp) return NULL; if (!(psl = calloc(1, sizeof(psl_ctx_t)))) return NULL; #ifdef WITH_LIBICU idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status); #endif /* * as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions. * as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions. */ psl->suffixes = _vector_alloc(8*1024, _suffix_compare); psl->suffix_exceptions = _vector_alloc(64, _suffix_compare); while ((linep = fgets(buf, sizeof(buf), fp))) { while (isspace(*linep)) linep++; /* ignore leading whitespace */ if (!*linep) continue; /* skip empty lines */ if (*linep == '/' && linep[1] == '/') continue; /* skip comments */ /* parse suffix rule */ for (p = linep; *linep && !isspace(*linep);) linep++; *linep = 0; if (*p == '!') { /* add to exceptions */ if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) { suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ #ifdef WITH_LIBICU _add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp); #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) _add_punycode_if_needed(psl->suffix_exceptions, suffixp); #endif } } else { /* add to suffixes */ if (_suffix_init(&suffix, p, linep - p) == 0) { suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ #ifdef WITH_LIBICU _add_punycode_if_needed(idna, psl->suffixes, suffixp); #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) _add_punycode_if_needed(psl->suffixes, suffixp); #endif } } } _vector_sort(psl->suffix_exceptions); _vector_sort(psl->suffixes); #ifdef WITH_LIBICU if (idna) uidna_close(idna); #endif return psl; }
static int _psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) { _psl_entry_t suffix, *rule; const char *p, *label_bak; unsigned short length_bak; /* this function should be called without leading dots, just make sure */ suffix.label = domain + (*domain == '.'); suffix.length = strlen(suffix.label); suffix.wildcard = 0; suffix.nlabels = 1; for (p = suffix.label; *p; p++) if (*p == '.') suffix.nlabels++; /* if domain has enough labels, it is public */ if (psl == &_builtin_psl) rule = &suffixes[0]; else rule = _vector_get(psl->suffixes, 0); if (!rule || rule->nlabels < suffix.nlabels - 1) return 0; if (psl == &_builtin_psl) rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare); else rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix)); if (rule) { /* definitely a match, no matter if the found rule is a wildcard or not */ return 1; } else if (suffix.nlabels == 1) { /* unknown TLD, this is the prevailing '*' match */ return 1; } label_bak = suffix.label; length_bak = suffix.length; if ((suffix.label = strchr(suffix.label, '.'))) { suffix.label++; suffix.length = strlen(suffix.label); suffix.nlabels--; if (psl == &_builtin_psl) rule = bsearch(&suffix, suffixes, countof(suffixes), sizeof(suffixes[0]), (int(*)(const void *, const void *))_suffix_compare); else rule = _vector_get(psl->suffixes, _vector_find(psl->suffixes, &suffix)); if (rule) { if (rule->wildcard) { /* now that we matched a wildcard, we have to check for an exception */ suffix.label = label_bak; suffix.length = length_bak; suffix.nlabels++; if (psl == &_builtin_psl) { if (bsearch(&suffix, suffix_exceptions, countof(suffix_exceptions), sizeof(suffix_exceptions[0]), (int(*)(const void *, const void *))_suffix_compare)) return 0; /* found an exception, so 'domain' is not a public suffix */ } else { if (_vector_get(psl->suffix_exceptions, _vector_find(psl->suffix_exceptions, &suffix)) != 0) return 0; /* found an exception, so 'domain' is not a public suffix */ } return 1; } } } return 0; }