static void _add_punycode_if_needed(_psl_vector_t *v) { int it, n; /* do not use 'it < v->cur' since v->cur is changed by _vector_add() ! */ for (it = 0, n = v->cur; it < n; it++) { _psl_entry_t *e = _vector_get(v, it); if (_str_needs_encoding(e->label_buf)) { _psl_entry_t suffix, *suffixp; char lookupname[64] = ""; /* this is much slower than the libidn2 API but should have no license issues */ FILE *pp; char cmd[16 + sizeof(e->label_buf)]; snprintf(cmd, sizeof(cmd), "idn2 '%s'", e->label_buf); if ((pp = popen(cmd, "r"))) { if (fscanf(pp, "%63s", lookupname) >= 1 && strcmp(e->label_buf, lookupname)) { /* fprintf(stderr, "idn2 '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } pclose(pp); } else fprintf(stderr, "Failed to call popen(%s, \"r\")\n", cmd); } } _vector_sort(v); }
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) { char *lookupname = NULL; int rc; uint8_t *lower, resbuf[256]; size_t len = sizeof(resbuf) - 1; /* leave space for additional \0 byte */ if (_str_is_ascii(e->label_buf)) return; /* we need a conversion to lowercase */ lower = u8_tolower((uint8_t *)e->label_buf, u8_strlen((uint8_t *)e->label_buf), 0, UNINORM_NFKC, resbuf, &len); if (!lower) { /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", e->label_buf, errno); */ return; } /* u8_tolower() does not terminate the result string */ if (lower == resbuf) { lower[len]=0; } else { uint8_t *tmp = lower; lower = (uint8_t *)strndup((char *)lower, len); free(tmp); } if ((rc = idn2_lookup_u8(lower, (uint8_t **)&lookupname, 0)) == IDN2_OK) { if (strcmp(e->label_buf, lookupname)) { _psl_entry_t suffix, *suffixp; /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ } /* else fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */ if (lower != resbuf) free(lower); }
static void _add_punycode_if_needed(_psl_vector_t *v, _psl_entry_t *e) { char *lookupname = NULL; int rc; if (_str_is_ascii(e->label_buf)) return; /* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */ if ((rc = idna_to_ascii_8z(e->label_buf, &lookupname, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) { if (strcmp(e->label_buf, lookupname)) { _psl_entry_t suffix, *suffixp; /* fprintf(stderr, "libidn '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ } /* else fprintf(_(stderr, "toASCII failed (%d): %s\n"), rc, idna_strerror(rc)); */ }
static void _add_punycode_if_needed(UIDNA *idna, _psl_vector_t *v, _psl_entry_t *e) { if (_str_is_ascii(e->label_buf)) return; /* IDNA2008 UTS#46 punycode conversion */ if (idna) { char lookupname[128] = ""; UErrorCode status = 0; UIDNAInfo info = UIDNA_INFO_INITIALIZER; UChar utf16_dst[128], utf16_src[128]; int32_t utf16_src_length; u_strFromUTF8(utf16_src, sizeof(utf16_src)/sizeof(utf16_src[0]), &utf16_src_length, e->label_buf, -1, &status); if (U_SUCCESS(status)) { int32_t dst_length = uidna_nameToASCII(idna, utf16_src, utf16_src_length, utf16_dst, sizeof(utf16_dst)/sizeof(utf16_dst[0]), &info, &status); if (U_SUCCESS(status)) { u_strToUTF8(lookupname, sizeof(lookupname), NULL, utf16_dst, dst_length, &status); if (U_SUCCESS(status)) { if (strcmp(e->label_buf, lookupname)) { _psl_entry_t suffix, *suffixp; /* fprintf(stderr, "libicu '%s' -> '%s'\n", e->label_buf, lookupname); */ _suffix_init(&suffix, lookupname, strlen(lookupname)); suffix.wildcard = e->wildcard; suffixp = _vector_get(v, _vector_add(v, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ } /* else ignore */ } /* else fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */ } /* else fprintf(stderr, "Failed to convert to ASCII (status %d)\n", status); */ } /* else fprintf(stderr, "Failed to convert UTF-8 to UTF-16 (status %d)\n", status); */ } }
/** * psl_load_fp: * @fp: FILE pointer * * This function loads the public suffixes from a FILE pointer. * To free the allocated resources, call psl_free(). * * The suffixes are expected to be lowercase UTF-8 encoded if they are international. * * Returns: Pointer to a PSL context or %NULL on failure. * * Since: 0.1 */ psl_ctx_t *psl_load_fp(FILE *fp) { psl_ctx_t *psl; _psl_entry_t suffix, *suffixp; char buf[256], *linep, *p; #ifdef WITH_LIBICU UIDNA *idna; UErrorCode status = 0; #endif if (!fp) return NULL; if (!(psl = calloc(1, sizeof(psl_ctx_t)))) return NULL; #ifdef WITH_LIBICU idna = uidna_openUTS46(UIDNA_USE_STD3_RULES, &status); #endif /* * as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions. * as of 19.02.2014, the list at http://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions. */ psl->suffixes = _vector_alloc(8*1024, _suffix_compare); psl->suffix_exceptions = _vector_alloc(64, _suffix_compare); while ((linep = fgets(buf, sizeof(buf), fp))) { while (isspace(*linep)) linep++; /* ignore leading whitespace */ if (!*linep) continue; /* skip empty lines */ if (*linep == '/' && linep[1] == '/') continue; /* skip comments */ /* parse suffix rule */ for (p = linep; *linep && !isspace(*linep);) linep++; *linep = 0; if (*p == '!') { /* add to exceptions */ if (_suffix_init(&suffix, p + 1, linep - p - 1) == 0) { suffixp = _vector_get(psl->suffix_exceptions, _vector_add(psl->suffix_exceptions, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ #ifdef WITH_LIBICU _add_punycode_if_needed(idna, psl->suffix_exceptions, suffixp); #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) _add_punycode_if_needed(psl->suffix_exceptions, suffixp); #endif } } else { /* add to suffixes */ if (_suffix_init(&suffix, p, linep - p) == 0) { suffixp = _vector_get(psl->suffixes, _vector_add(psl->suffixes, &suffix)); suffixp->label = suffixp->label_buf; /* set label to changed address */ #ifdef WITH_LIBICU _add_punycode_if_needed(idna, psl->suffixes, suffixp); #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) _add_punycode_if_needed(psl->suffixes, suffixp); #endif } } } _vector_sort(psl->suffix_exceptions); _vector_sort(psl->suffixes); #ifdef WITH_LIBICU if (idna) uidna_close(idna); #endif return psl; }