static void test_psl(void) { FILE *fp; const psl_ctx_t *psl; char buf[256], domain[128], expected_regdom[128]; psl = psl_builtin(); printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl)); /* special check with NULL values */ test(NULL, NULL, NULL); /* special check with NULL psl context */ test(NULL, "www.example.com", NULL); /* special check with NULL psl context and TLD */ test(NULL, "com", NULL); /* Norwegian with uppercase oe */ #ifdef WITH_LIBICU test(psl, "www.\303\230yer.no", "www.\303\270yer.no"); #endif /* Norwegian with lowercase oe */ test(psl, "www.\303\270yer.no", "www.\303\270yer.no"); /* special check with NULL psl context and TLD */ test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name"); /* special check with NULL psl context and TLD */ test(psl, "forgot.his.name", NULL); /* special check with NULL psl context and TLD */ test(psl, "his.name", "his.name"); if ((fp = fopen(PSL_TESTFILE, "r"))) { while ((fgets(buf, sizeof(buf), fp))) { if (sscanf(buf, " checkPublicSuffix('%127[^']' , '%127[^']", domain, expected_regdom) != 2) { if (sscanf(buf, " checkPublicSuffix('%127[^']' , %127[nul]", domain, expected_regdom) != 2) continue; } if (!strcmp(expected_regdom, "null")) test(psl, domain, NULL); else test(psl, domain, expected_regdom); } fclose(fp); } else { printf("Failed to open %s\n", PSL_TESTFILE); failed++; } }
mget_cookie_db_t *mget_cookie_db_init(mget_cookie_db_t *cookie_db) { if (!cookie_db) cookie_db = xmalloc(sizeof(mget_cookie_db_t)); memset(cookie_db, 0, sizeof(*cookie_db)); cookie_db->cookies = mget_vector_create(32, -2, (int(*)(const void *, const void *))_compare_cookie); mget_vector_set_destructor(cookie_db->cookies, (void(*)(void *))mget_cookie_deinit); mget_thread_mutex_init(&cookie_db->mutex); #ifdef WITH_LIBPSL cookie_db->psl = (psl_ctx_t *)psl_builtin(); #endif return cookie_db; }
static void test_psl(void) { FILE *fp; psl_ctx_t *psl, *psl3, *psl4, *psl5; const psl_ctx_t *psl2; int type = 0; char buf[256], *linep, *p; psl = psl_load_file(PSL_FILE); /* PSL_FILE can be set by ./configure --with-psl-file=[PATH] */ printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl)); psl2 = psl_builtin(); printf("builtin PSL has %d suffixes and %d exceptions\n", psl_suffix_count(psl2), psl_suffix_exception_count(psl2)); if (!(psl3 = psl_load_file(PSL_DAFSA))) { fprintf(stderr, "Failed to load 'psl.dafsa'\n"); failed++; } if (!(psl4 = psl_load_file(PSL_ASCII_DAFSA))) { fprintf(stderr, "Failed to load 'psl_ascii.dafsa'\n"); failed++; } psl5 = psl_latest("psl.dafsa"); if ((fp = fopen(PSL_FILE, "r"))) { #ifdef HAVE_CLOCK_GETTIME clock_gettime(CLOCK_REALTIME, &ts1); #endif while ((linep = fgets(buf, sizeof(buf), fp))) { while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */ if (!*linep) continue; /* skip empty lines */ if (*linep == '/' && linep[1] == '/') { if (!type) { if (strstr(linep + 2, "===BEGIN ICANN DOMAINS===")) type = PSL_TYPE_ICANN; else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS===")) type = PSL_TYPE_PRIVATE; } else if (type == PSL_TYPE_ICANN && strstr(linep + 2, "===END ICANN DOMAINS===")) type = 0; else if (type == PSL_TYPE_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS===")) type = 0; continue; /* skip comments */ } /* parse suffix rule */ for (p = linep; *linep && !_isspace_ascii(*linep);) linep++; *linep = 0; test_psl_entry(psl, p, type); if (psl2) test_psl_entry(psl2, p, type); if (psl3) test_psl_entry(psl3, p, type); if (psl4) test_psl_entry(psl4, p, type); if (psl5) test_psl_entry(psl5, p, type); } #ifdef HAVE_CLOCK_GETTIME clock_gettime(CLOCK_REALTIME, &ts2); #endif fclose(fp); } else { printf("Failed to open %s\n", PSL_FILE); failed++; } psl_free(psl5); psl_free(psl4); psl_free(psl3); psl_free((psl_ctx_t *)psl2); psl_free(psl); }
struct Cookie * Curl_cookie_add(struct Curl_easy *data, /* The 'data' pointer here may be NULL at times, and thus must only be used very carefully for things that can deal with data being NULL. Such as infof() and similar */ struct CookieInfo *c, bool httpheader, /* TRUE if HTTP header-style line */ bool noexpire, /* if TRUE, skip remove_expired() */ char *lineptr, /* first character of the line */ const char *domain, /* default domain */ const char *path) /* full path used when this cookie is set, used to get default path for the cookie unless set */ { struct Cookie *clist; struct Cookie *co; struct Cookie *lastc = NULL; time_t now = time(NULL); bool replace_old = FALSE; bool badcookie = FALSE; /* cookies are good by default. mmmmm yummy */ size_t myhash; #ifdef USE_LIBPSL const psl_ctx_t *psl; #endif #ifdef CURL_DISABLE_VERBOSE_STRINGS (void)data; #endif /* First, alloc and init a new struct for it */ co = calloc(1, sizeof(struct Cookie)); if(!co) return NULL; /* bail out if we're this low on memory */ if(httpheader) { /* This line was read off a HTTP-header */ char name[MAX_NAME]; char what[MAX_NAME]; const char *ptr; const char *semiptr; size_t linelength = strlen(lineptr); if(linelength > MAX_COOKIE_LINE) { /* discard overly long lines at once */ free(co); return NULL; } semiptr = strchr(lineptr, ';'); /* first, find a semicolon */ while(*lineptr && ISBLANK(*lineptr)) lineptr++; ptr = lineptr; do { /* we have a <what>=<this> pair or a stand-alone word here */ name[0] = what[0] = 0; /* init the buffers */ if(1 <= sscanf(ptr, "%" MAX_NAME_TXT "[^;\r\n=] =%" MAX_NAME_TXT "[^;\r\n]", name, what)) { /* Use strstore() below to properly deal with received cookie headers that have the same string property set more than once, and then we use the last one. */ const char *whatptr; bool done = FALSE; bool sep; size_t len = strlen(what); size_t nlen = strlen(name); const char *endofn = &ptr[ nlen ]; if(nlen >= (MAX_NAME-1) || len >= (MAX_NAME-1) || ((nlen + len) > MAX_NAME)) { /* too long individual name or contents, or too long combination of name + contents. Chrome and Firefox support 4095 or 4096 bytes combo. */ freecookie(co); infof(data, "oversized cookie dropped, name/val %d + %d bytes\n", nlen, len); return NULL; } /* name ends with a '=' ? */ sep = (*endofn == '=')?TRUE:FALSE; if(nlen) { endofn--; /* move to the last character */ if(ISBLANK(*endofn)) { /* skip trailing spaces in name */ while(*endofn && ISBLANK(*endofn) && nlen) { endofn--; nlen--; } name[nlen] = 0; /* new end of name */ } } /* Strip off trailing whitespace from the 'what' */ while(len && ISBLANK(what[len-1])) { what[len-1] = 0; len--; } /* Skip leading whitespace from the 'what' */ whatptr = what; while(*whatptr && ISBLANK(*whatptr)) whatptr++; if(!co->name && sep) { /* The very first name/value pair is the actual cookie name */ co->name = strdup(name); co->value = strdup(whatptr); if(!co->name || !co->value) { badcookie = TRUE; break; } } else if(!len) { /* this was a "<name>=" with no content, and we must allow 'secure' and 'httponly' specified this weirdly */ done = TRUE; if(strcasecompare("secure", name)) co->secure = TRUE; else if(strcasecompare("httponly", name)) co->httponly = TRUE; else if(sep) /* there was a '=' so we're not done parsing this field */ done = FALSE; } if(done) ; else if(strcasecompare("path", name)) { strstore(&co->path, whatptr); if(!co->path) { badcookie = TRUE; /* out of memory bad */ break; } free(co->spath); /* if this is set again */ co->spath = sanitize_cookie_path(co->path); if(!co->spath) { badcookie = TRUE; /* out of memory bad */ break; } } else if(strcasecompare("domain", name)) { bool is_ip; /* Now, we make sure that our host is within the given domain, or the given domain is not valid and thus cannot be set. */ if('.' == whatptr[0]) whatptr++; /* ignore preceding dot */ #ifndef USE_LIBPSL /* * Without PSL we don't know when the incoming cookie is set on a * TLD or otherwise "protected" suffix. To reduce risk, we require a * dot OR the exact host name being "localhost". */ { const char *dotp; /* check for more dots */ dotp = strchr(whatptr, '.'); if(!dotp && !strcasecompare("localhost", whatptr)) domain = ":"; } #endif is_ip = isip(domain ? domain : whatptr); if(!domain || (is_ip && !strcmp(whatptr, domain)) || (!is_ip && tailmatch(whatptr, domain))) { strstore(&co->domain, whatptr); if(!co->domain) { badcookie = TRUE; break; } if(!is_ip) co->tailmatch = TRUE; /* we always do that if the domain name was given */ } else { /* we did not get a tailmatch and then the attempted set domain is not a domain to which the current host belongs. Mark as bad. */ badcookie = TRUE; infof(data, "skipped cookie with bad tailmatch domain: %s\n", whatptr); } } else if(strcasecompare("version", name)) { strstore(&co->version, whatptr); if(!co->version) { badcookie = TRUE; break; } } else if(strcasecompare("max-age", name)) { /* Defined in RFC2109: Optional. The Max-Age attribute defines the lifetime of the cookie, in seconds. The delta-seconds value is a decimal non- negative integer. After delta-seconds seconds elapse, the client should discard the cookie. A value of zero means the cookie should be discarded immediately. */ strstore(&co->maxage, whatptr); if(!co->maxage) { badcookie = TRUE; break; } } else if(strcasecompare("expires", name)) { strstore(&co->expirestr, whatptr); if(!co->expirestr) { badcookie = TRUE; break; } } /* else this is the second (or more) name we don't know about! */ } else { /* this is an "illegal" <what>=<this> pair */ } if(!semiptr || !*semiptr) { /* we already know there are no more cookies */ semiptr = NULL; continue; } ptr = semiptr + 1; while(*ptr && ISBLANK(*ptr)) ptr++; semiptr = strchr(ptr, ';'); /* now, find the next semicolon */ if(!semiptr && *ptr) /* There are no more semicolons, but there's a final name=value pair coming up */ semiptr = strchr(ptr, '\0'); } while(semiptr); if(co->maxage) { CURLofft offt; offt = curlx_strtoofft((*co->maxage == '\"')? &co->maxage[1]:&co->maxage[0], NULL, 10, &co->expires); if(offt == CURL_OFFT_FLOW) /* overflow, used max value */ co->expires = CURL_OFF_T_MAX; else if(!offt) { if(CURL_OFF_T_MAX - now < co->expires) /* would overflow */ co->expires = CURL_OFF_T_MAX; else co->expires += now; } } else if(co->expirestr) { /* Note that if the date couldn't get parsed for whatever reason, the cookie will be treated as a session cookie */ co->expires = curl_getdate(co->expirestr, NULL); /* Session cookies have expires set to 0 so if we get that back from the date parser let's add a second to make it a non-session cookie */ if(co->expires == 0) co->expires = 1; else if(co->expires < 0) co->expires = 0; } if(!badcookie && !co->domain) { if(domain) { /* no domain was given in the header line, set the default */ co->domain = strdup(domain); if(!co->domain) badcookie = TRUE; } } if(!badcookie && !co->path && path) { /* No path was given in the header line, set the default. Note that the passed-in path to this function MAY have a '?' and following part that MUST not be stored as part of the path. */ char *queryp = strchr(path, '?'); /* queryp is where the interesting part of the path ends, so now we want to the find the last */ char *endslash; if(!queryp) endslash = strrchr(path, '/'); else endslash = memrchr(path, '/', (size_t)(queryp - path)); if(endslash) { size_t pathlen = (size_t)(endslash-path + 1); /* include end slash */ co->path = malloc(pathlen + 1); /* one extra for the zero byte */ if(co->path) { memcpy(co->path, path, pathlen); co->path[pathlen] = 0; /* zero terminate */ co->spath = sanitize_cookie_path(co->path); if(!co->spath) badcookie = TRUE; /* out of memory bad */ } else badcookie = TRUE; } } if(badcookie || !co->name) { /* we didn't get a cookie name or a bad one, this is an illegal line, bail out */ freecookie(co); return NULL; } } else { /* This line is NOT a HTTP header style line, we do offer support for reading the odd netscape cookies-file format here */ char *ptr; char *firstptr; char *tok_buf = NULL; int fields; /* IE introduced HTTP-only cookies to prevent XSS attacks. Cookies marked with httpOnly after the domain name are not accessible from javascripts, but since curl does not operate at javascript level, we include them anyway. In Firefox's cookie files, these lines are preceded with #HttpOnly_ and then everything is as usual, so we skip 10 characters of the line.. */ if(strncmp(lineptr, "#HttpOnly_", 10) == 0) { lineptr += 10; co->httponly = TRUE; } if(lineptr[0]=='#') { /* don't even try the comments */ free(co); return NULL; } /* strip off the possible end-of-line characters */ ptr = strchr(lineptr, '\r'); if(ptr) *ptr = 0; /* clear it */ ptr = strchr(lineptr, '\n'); if(ptr) *ptr = 0; /* clear it */ firstptr = strtok_r(lineptr, "\t", &tok_buf); /* tokenize it on the TAB */ /* Now loop through the fields and init the struct we already have allocated */ for(ptr = firstptr, fields = 0; ptr && !badcookie; ptr = strtok_r(NULL, "\t", &tok_buf), fields++) { switch(fields) { case 0: if(ptr[0]=='.') /* skip preceding dots */ ptr++; co->domain = strdup(ptr); if(!co->domain) badcookie = TRUE; break; case 1: /* This field got its explanation on the 23rd of May 2001 by Andrés García: flag: A TRUE/FALSE value indicating if all machines within a given domain can access the variable. This value is set automatically by the browser, depending on the value you set for the domain. As far as I can see, it is set to true when the cookie says .domain.com and to false when the domain is complete www.domain.com */ co->tailmatch = strcasecompare(ptr, "TRUE")?TRUE:FALSE; break; case 2: /* It turns out, that sometimes the file format allows the path field to remain not filled in, we try to detect this and work around it! Andrés García made us aware of this... */ if(strcmp("TRUE", ptr) && strcmp("FALSE", ptr)) { /* only if the path doesn't look like a boolean option! */ co->path = strdup(ptr); if(!co->path) badcookie = TRUE; else { co->spath = sanitize_cookie_path(co->path); if(!co->spath) { badcookie = TRUE; /* out of memory bad */ } } break; } /* this doesn't look like a path, make one up! */ co->path = strdup("/"); if(!co->path) badcookie = TRUE; co->spath = strdup("/"); if(!co->spath) badcookie = TRUE; fields++; /* add a field and fall down to secure */ /* FALLTHROUGH */ case 3: co->secure = strcasecompare(ptr, "TRUE")?TRUE:FALSE; break; case 4: if(curlx_strtoofft(ptr, NULL, 10, &co->expires)) badcookie = TRUE; break; case 5: co->name = strdup(ptr); if(!co->name) badcookie = TRUE; break; case 6: co->value = strdup(ptr); if(!co->value) badcookie = TRUE; break; } } if(6 == fields) { /* we got a cookie with blank contents, fix it */ co->value = strdup(""); if(!co->value) badcookie = TRUE; else fields++; } if(!badcookie && (7 != fields)) /* we did not find the sufficient number of fields */ badcookie = TRUE; if(badcookie) { freecookie(co); return NULL; } } if(!c->running && /* read from a file */ c->newsession && /* clean session cookies */ !co->expires) { /* this is a session cookie since it doesn't expire! */ freecookie(co); return NULL; } co->livecookie = c->running; /* now, we have parsed the incoming line, we must now check if this superceeds an already existing cookie, which it may if the previous have the same domain and path as this */ /* at first, remove expired cookies */ if(!noexpire) remove_expired(c); #ifdef USE_LIBPSL /* Check if the domain is a Public Suffix and if yes, ignore the cookie. This needs a libpsl compiled with builtin data. */ if(domain && co->domain && !isip(co->domain)) { psl = psl_builtin(); if(psl && !psl_is_cookie_domain_acceptable(psl, domain, co->domain)) { infof(data, "cookie '%s' dropped, domain '%s' must not set cookies for '%s'\n", co->name, domain, co->domain); freecookie(co); return NULL; } } #endif myhash = cookiehash(co->domain); clist = c->cookies[myhash]; replace_old = FALSE; while(clist) { if(strcasecompare(clist->name, co->name)) { /* the names are identical */ if(clist->domain && co->domain) { if(strcasecompare(clist->domain, co->domain) && (clist->tailmatch == co->tailmatch)) /* The domains are identical */ replace_old = TRUE; } else if(!clist->domain && !co->domain) replace_old = TRUE; if(replace_old) { /* the domains were identical */ if(clist->spath && co->spath) { if(strcasecompare(clist->spath, co->spath)) { replace_old = TRUE; } else replace_old = FALSE; } else if(!clist->spath && !co->spath) replace_old = TRUE; else replace_old = FALSE; } if(replace_old && !co->livecookie && clist->livecookie) { /* Both cookies matched fine, except that the already present cookie is "live", which means it was set from a header, while the new one isn't "live" and thus only read from a file. We let live cookies stay alive */ /* Free the newcomer and get out of here! */ freecookie(co); return NULL; } if(replace_old) { co->next = clist->next; /* get the next-pointer first */ /* then free all the old pointers */ free(clist->name); free(clist->value); free(clist->domain); free(clist->path); free(clist->spath); free(clist->expirestr); free(clist->version); free(clist->maxage); *clist = *co; /* then store all the new data */ free(co); /* free the newly alloced memory */ co = clist; /* point to the previous struct instead */ /* We have replaced a cookie, now skip the rest of the list but make sure the 'lastc' pointer is properly set */ do { lastc = clist; clist = clist->next; } while(clist); break; } } lastc = clist; clist = clist->next; } if(c->running) /* Only show this when NOT reading the cookies from a file */ infof(data, "%s cookie %s=\"%s\" for domain %s, path %s, " "expire %" CURL_FORMAT_CURL_OFF_T "\n", replace_old?"Replaced":"Added", co->name, co->value, co->domain, co->path, co->expires); if(!replace_old) { /* then make the last item point on this new one */ if(lastc) lastc->next = co; else c->cookies[myhash] = co; c->numcookies++; /* one more cookie in the jar */ } return co; }
int main(int argc, const char *const *argv) { int mode = 1; const char *const *arg, *psl_file = NULL, *cookie_domain = NULL; psl_ctx_t *psl = (psl_ctx_t *) psl_builtin(); /* set current locale according to the environment variables */ #include <locale.h> setlocale(LC_ALL, ""); for (arg = argv + 1; arg < argv + argc; arg++) { if (!strncmp(*arg, "--", 2)) { if (!strcmp(*arg, "--is-public-suffix")) mode = 1; else if (!strcmp(*arg, "--print-unreg-domain")) mode = 2; else if (!strcmp(*arg, "--print-reg-domain")) mode = 3; else if (!strcmp(*arg, "--print-info")) mode = 99; else if (!strcmp(*arg, "--is-cookie-domain-acceptable") && arg < argv + argc - 1) { mode = 4; cookie_domain = *(++arg); } else if (!strcmp(*arg, "--use-builtin-data")) { psl_free(psl); if (psl_file) { fprintf(stderr, "Dropped data from %s\n", psl_file); psl_file = NULL; } if (!(psl = (psl_ctx_t *) psl_builtin())) printf("No builtin PSL data available\n"); } else if (!strcmp(*arg, "--load-psl-file") && arg < argv + argc - 1) { psl_free(psl); if (psl_file) { fprintf(stderr, "Dropped data from %s\n", psl_file); psl_file = NULL; } if (!(psl = psl_load_file(psl_file = *(++arg)))) { fprintf(stderr, "Failed to load PSL data from %s\n\n", psl_file); psl_file = NULL; } } else if (!strcmp(*arg, "--help")) { fprintf(stdout, "`psl' explores the Public Suffix List\n\n"); usage(0, stdout); } else if (!strcmp(*arg, "--version")) { printf("psl %s\n", PACKAGE_VERSION); printf("libpsl %s\n", psl_get_version()); printf("\n"); printf("Copyright (C) 2014-2015 Tim Ruehsen\n"); printf("License: MIT\n"); exit(0); } else if (!strcmp(*arg, "--")) { arg++; break; } else { fprintf(stderr, "Unknown option '%s'\n", *arg); usage(1, stderr); } } else break; } if (mode != 99) { if (!psl) { fprintf(stderr, "No PSL data available - aborting\n"); exit(2); } if (arg >= argv + argc) { char buf[256], *domain, *lower; size_t len; psl_error_t rc; /* read URLs from STDIN */ while (fgets(buf, sizeof(buf), stdin)) { for (domain = buf; isspace(*domain); domain++); /* skip leading spaces */ if (*domain == '#' || !*domain) continue; /* skip empty lines and comments */ for (len = strlen(domain); len && isspace(domain[len - 1]); len--); /* skip trailing spaces */ domain[len] = 0; if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &lower)) != PSL_SUCCESS) fprintf(stderr, "%s: Failed to convert to lowercase UTF-8 (%d)\n", domain, rc); else if (mode == 1) printf("%s: %d (%s)\n", domain, psl_is_public_suffix(psl, lower), lower); else if (mode == 2) printf("%s: %s\n", domain, psl_unregistrable_domain(psl, lower)); else if (mode == 3) printf("%s: %s\n", domain, psl_registrable_domain(psl, lower)); else if (mode == 4) { char *cookie_domain_lower; if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &cookie_domain_lower)) != PSL_SUCCESS) fprintf(stderr, "%s: Failed to convert cookie domain '%s' to lowercase UTF-8 (%d)\n", domain, cookie_domain, rc); else printf("%s: %d\n", domain, psl_is_cookie_domain_acceptable(psl, lower, cookie_domain)); free(cookie_domain_lower); } free(lower); } psl_free(psl); exit(0); } } if (mode == 1) { for (; arg < argv + argc; arg++) printf("%s: %d\n", *arg, psl_is_public_suffix(psl, *arg)); } else if (mode == 2) { for (; arg < argv + argc; arg++) printf("%s: %s\n", *arg, psl_unregistrable_domain(psl, *arg)); } else if (mode == 3) { for (; arg < argv + argc; arg++) printf("%s: %s\n", *arg, psl_registrable_domain(psl, *arg)); } else if (mode == 4) { for (; arg < argv + argc; arg++) printf("%s: %d\n", *arg, psl_is_cookie_domain_acceptable(psl, *arg, cookie_domain)); } else if (mode == 99) { if (psl && psl != psl_builtin()) { printf("suffixes: %d\n", psl_suffix_count(psl)); printf("exceptions: %d\n", psl_suffix_exception_count(psl)); printf("wildcards: %d\n", psl_suffix_wildcard_count(psl)); } psl_free(psl); psl = (psl_ctx_t *) psl_builtin(); if (psl) { printf("builtin suffixes: %d\n", psl_suffix_count(psl)); printf("builtin exceptions: %d\n", psl_suffix_exception_count(psl)); printf("builtin wildcards: %d\n", psl_suffix_wildcard_count(psl)); printf("builtin filename: %s\n", psl_builtin_filename()); printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time())); printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time())); printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum()); } else printf("No builtin PSL data available\n"); } psl_free(psl); return 0; }
static void test_psl(void) { /* punycode generation: idn ?? */ /* octal code generation: echo -n "??" | od -b */ static const struct test_data { const char *domain; int result; } test_data[] = { { "www.example.com", 0 }, { "com.ar", 1 }, { "www.com.ar", 0 }, { "cc.ar.us", 1 }, { ".cc.ar.us", 1 }, { "www.cc.ar.us", 0 }, { "www.ck", 0 }, /* exception from *.ck */ { "abc.www.ck", 0 }, { "xxx.ck", 1 }, { "www.xxx.ck", 0 }, { "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder ?? */ { "www.\345\225\206\346\240\207", 0 }, { "xn--czr694b", 1 }, { "www.xn--czr694b", 0 }, /* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */ { "name", 1 }, { ".name", 1 }, { "his.name", 0 }, { ".his.name", 0 }, { "forgot.his.name", 1 }, { ".forgot.his.name", 1 }, { "whoever.his.name", 0 }, { "whoever.forgot.his.name", 0 }, { ".", 1 }, /* special case */ { "", 1 }, /* special case */ { NULL, 1 }, /* special case */ { "adfhoweirh", 1 }, /* unknown TLD */ }; unsigned it; const psl_ctx_t *psl; psl = psl_builtin(); printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl)); for (it = 0; it < countof(test_data); it++) { const struct test_data *t = &test_data[it]; int result = psl_is_public_suffix(psl, t->domain); if (result == t->result) { ok++; } else { failed++; printf("psl_is_public_suffix(%s)=%d (expected %d)\n", t->domain, result, t->result); } } printf("psl_builtin_compile_time()=%ld\n", psl_builtin_compile_time()); psl_builtin_compile_time() == 0 ? failed++ : ok++; printf("psl_builtin_file_time()=%ld\n", psl_builtin_file_time()); psl_builtin_file_time() == 0 ? failed++ : ok++; printf("psl_builtin_sha1sum()=%s\n", psl_builtin_sha1sum()); *psl_builtin_sha1sum() == 0 ? failed++ : ok++; }
static void test_psl(void) { FILE *fp; const psl_ctx_t *psl; const char *p; char buf[256], domain[128], expected_regdom[128], semicolon[2]; char lbuf[258]; int er_is_null, d_is_null; unsigned it; psl = psl_builtin(); printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl)); /* special check with NULL values */ test(NULL, NULL, NULL); /* special check with NULL psl context */ test(NULL, "www.example.com", NULL); /* special check with NULL psl context and TLD */ test(NULL, "com", NULL); /* Norwegian with uppercase oe */ #ifdef WITH_LIBICU test(psl, "www.\303\230yer.no", "www.\303\270yer.no"); #endif /* Norwegian with lowercase oe */ test(psl, "www.\303\270yer.no", "www.\303\270yer.no"); /* Norwegian with lowercase oe, encoded as ISO-8859-15 */ test_iso(psl, "www.\370yer.no", "www.\303\270yer.no"); /* Testing special code paths of psl_str_to_utf8lower() */ for (it = 254; it <= 257; it++) { memset(lbuf, 'a', it); lbuf[it] = 0; lbuf[0] = '\370'; test_iso(psl, lbuf, NULL); lbuf[0] = '\303'; lbuf[1] = '\270'; test(psl, lbuf, NULL); } /* special check with NULL psl context and TLD */ test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name"); /* special check with NULL psl context and TLD */ test(psl, "forgot.his.name", NULL); /* special check with NULL psl context and TLD */ test(psl, "his.name", "his.name"); if ((fp = fopen(PSL_TESTFILE, "r"))) { while ((fgets(buf, sizeof(buf), fp))) { /* advance over ASCII white space */ for (p = buf; *p == ' ' || *p == '\t' || *p == '\r' || *p == '\n'; p++) ; if (!*p || (*p == '/' && p[1] == '/')) continue; /* ignore comments and blank lines */ er_is_null = 0; d_is_null = 0; if (sscanf(p, "checkPublicSuffix ( '%127[^']' , '%127[^']' ) %1[;]", domain, expected_regdom, semicolon) != 3) { if (sscanf(p, "checkPublicSuffix ( '%127[^']' , null ) %1[;]", domain, semicolon) == 2) { er_is_null = 1; } else if (sscanf(p, "checkPublicSuffix ( null , '%127[^']' ) %1[;]", expected_regdom, semicolon) == 2) { d_is_null = 1; } else if (sscanf(p, "checkPublicSuffix ( null , null ) %1[;]", semicolon) == 1) { d_is_null = 1; er_is_null = 1; } else if (sscanf(p, "%127s %127s", domain, expected_regdom) == 2) { if (!strcmp(domain, "null")) d_is_null = 1; if (!strcmp(expected_regdom, "null")) er_is_null = 1; } else { failed++; printf("Malformed line from '" PSL_TESTFILE "': %s", buf); continue; } } test(psl, d_is_null ? NULL : domain, er_is_null ? NULL : expected_regdom); } fclose(fp); } else { printf("Failed to open %s\n", PSL_TESTFILE); failed++; } }
static void test_psl(void) { /* punycode generation: idn ?? */ /* octal code generation: echo -n "??" | od -b */ static const struct test_data { const char *domain; int result; } test_data[] = { { "www.example.com", 0 }, { "com.ar", 1 }, { "www.com.ar", 0 }, { "cc.ar.us", 1 }, { ".cc.ar.us", 1 }, { "www.cc.ar.us", 0 }, { "www.ck", 0 }, /* exception from *.ck */ { "abc.www.ck", 0 }, { "xxx.ck", 1 }, { "www.xxx.ck", 0 }, { "\345\225\206\346\240\207", 1 }, /* xn--czr694b or ?? */ { "www.\345\225\206\346\240\207", 0 }, /* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */ { "name", 1 }, { ".name", 1 }, { "his.name", 0 }, { ".his.name", 0 }, { "forgot.his.name", 1 }, { ".forgot.his.name", 1 }, { "whoever.his.name", 0 }, { "whoever.forgot.his.name", 0 }, { ".", 1 }, /* special case */ { "", 1 }, /* special case */ { NULL, 1 }, /* special case */ { "adfhoweirh", 1 }, /* unknown TLD */ }; unsigned it; int result, ver; psl_ctx_t *psl; psl = psl_load_file(PSL_FILE); printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl)); for (it = 0; it < countof(test_data); it++) { const struct test_data *t = &test_data[it]; result = psl_is_public_suffix(psl, t->domain); if (result == t->result) { ok++; } else { failed++; printf("psl_is_public_suffix(%s)=%d (expected %d)\n", t->domain, result, t->result); } } /* do some checks to cover more code paths in libpsl */ psl_is_public_suffix(NULL, "xxx"); if ((ver = psl_check_version_number(0)) == 0) { printf("psl_check_version_number(0) is 0\n"); failed++; } else { if (((result = psl_check_version_number(ver)) != ver)) { printf("psl_check_version_number(%06X) is %06X\n", ver, result); failed++; } if (((result = psl_check_version_number(ver - 1)) != 0)) { printf("psl_check_version_number(%06X) is %06X\n", ver - 1, result); failed++; } if (((result = psl_check_version_number(ver + 1)) != ver)) { printf("psl_check_version_number(%06X) is %06X\n", ver, result); failed++; } } psl_str_to_utf8lower("www.example.com", "utf-8", "en", NULL); psl_str_to_utf8lower(NULL, "utf-8", "en", NULL); { char *lower = NULL; psl_str_to_utf8lower("www.example.com", NULL, "de", &lower); free(lower); lower = NULL; psl_str_to_utf8lower("\374bel.de", NULL, "de", &lower); free(lower); lower = NULL; psl_str_to_utf8lower("\374bel.de", "iso-8859-1", NULL, &lower); free(lower); lower = NULL; psl_str_to_utf8lower(NULL, "utf-8", "en", &lower); free(lower); lower = NULL; } psl_get_version(); psl_dist_filename(); psl_builtin_filename(); psl_builtin_outdated(); psl_builtin_file_time(); psl_builtin_sha1sum(); psl_suffix_wildcard_count(NULL); psl_suffix_wildcard_count(psl); psl_suffix_wildcard_count(psl_builtin()); psl_suffix_count(NULL); psl_suffix_exception_count(NULL); psl_load_file(NULL); psl_load_fp(NULL); psl_registrable_domain(NULL, ""); psl_registrable_domain(psl, NULL); psl_registrable_domain(psl, "www.example.com"); psl_unregistrable_domain(NULL, ""); psl_unregistrable_domain(psl, NULL); psl_is_public_suffix2(NULL, "", PSL_TYPE_ANY); psl_is_public_suffix2(psl, NULL, PSL_TYPE_ANY); psl_free(psl); }