static void test_psl(void)
{
	FILE *fp;
	const psl_ctx_t *psl;
	char buf[256], domain[128], expected_regdom[128];

	psl = psl_builtin();

	printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));

	/* special check with NULL values */
	test(NULL, NULL, NULL);

	/* special check with NULL psl context */
	test(NULL, "www.example.com", NULL);

	/* special check with NULL psl context and TLD */
	test(NULL, "com", NULL);

	/* Norwegian with uppercase oe */
#ifdef WITH_LIBICU
	test(psl, "www.\303\230yer.no", "www.\303\270yer.no");
#endif

	/* Norwegian with lowercase oe */
	test(psl, "www.\303\270yer.no", "www.\303\270yer.no");

	/* special check with NULL psl context and TLD */
	test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name");

	/* special check with NULL psl context and TLD */
	test(psl, "forgot.his.name", NULL);

	/* special check with NULL psl context and TLD */
	test(psl, "his.name", "his.name");

	if ((fp = fopen(PSL_TESTFILE, "r"))) {
		while ((fgets(buf, sizeof(buf), fp))) {
			if (sscanf(buf, " checkPublicSuffix('%127[^']' , '%127[^']", domain, expected_regdom) != 2) {
				if (sscanf(buf, " checkPublicSuffix('%127[^']' , %127[nul]", domain, expected_regdom) != 2)
					continue;
			}

			if (!strcmp(expected_regdom, "null"))
				test(psl, domain, NULL);
			else
				test(psl, domain, expected_regdom);
		}

		fclose(fp);
	} else {
		printf("Failed to open %s\n", PSL_TESTFILE);
		failed++;
	}
}
Beispiel #2
0
mget_cookie_db_t *mget_cookie_db_init(mget_cookie_db_t *cookie_db)
{
	if (!cookie_db)
		cookie_db = xmalloc(sizeof(mget_cookie_db_t));

	memset(cookie_db, 0, sizeof(*cookie_db));
	cookie_db->cookies = mget_vector_create(32, -2, (int(*)(const void *, const void *))_compare_cookie);
	mget_vector_set_destructor(cookie_db->cookies, (void(*)(void *))mget_cookie_deinit);
	mget_thread_mutex_init(&cookie_db->mutex);
#ifdef WITH_LIBPSL
	cookie_db->psl = (psl_ctx_t *)psl_builtin();
#endif

	return cookie_db;
}
Beispiel #3
0
static void test_psl(void)
{
	FILE *fp;
	psl_ctx_t *psl, *psl3, *psl4, *psl5;
	const psl_ctx_t *psl2;
	int type = 0;
	char buf[256], *linep, *p;

	psl = psl_load_file(PSL_FILE); /* PSL_FILE can be set by ./configure --with-psl-file=[PATH] */
	printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));

	psl2 = psl_builtin();
	printf("builtin PSL has %d suffixes and %d exceptions\n", psl_suffix_count(psl2), psl_suffix_exception_count(psl2));

	if (!(psl3 = psl_load_file(PSL_DAFSA))) {
		fprintf(stderr, "Failed to load 'psl.dafsa'\n");
		failed++;
	}

	if (!(psl4 = psl_load_file(PSL_ASCII_DAFSA))) {
		fprintf(stderr, "Failed to load 'psl_ascii.dafsa'\n");
		failed++;
	}

	psl5 = psl_latest("psl.dafsa");

	if ((fp = fopen(PSL_FILE, "r"))) {
#ifdef HAVE_CLOCK_GETTIME
		clock_gettime(CLOCK_REALTIME, &ts1);
#endif

		while ((linep = fgets(buf, sizeof(buf), fp))) {
			while (_isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
			if (!*linep) continue; /* skip empty lines */

			if (*linep == '/' && linep[1] == '/') {
				if (!type) {
					if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
						type = PSL_TYPE_ICANN;
					else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
						type = PSL_TYPE_PRIVATE;
				}
				else if (type == PSL_TYPE_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
					type = 0;
				else if (type == PSL_TYPE_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
					type = 0;

				continue; /* skip comments */
			}

			/* parse suffix rule */
			for (p = linep; *linep && !_isspace_ascii(*linep);) linep++;
			*linep = 0;

			test_psl_entry(psl, p, type);

			if (psl2)
				test_psl_entry(psl2, p, type);

			if (psl3)
				test_psl_entry(psl3, p, type);

			if (psl4)
				test_psl_entry(psl4, p, type);

			if (psl5)
				test_psl_entry(psl5, p, type);
		}

#ifdef HAVE_CLOCK_GETTIME
		clock_gettime(CLOCK_REALTIME, &ts2);
#endif
		fclose(fp);
	} else {
		printf("Failed to open %s\n", PSL_FILE);
		failed++;
	}

	psl_free(psl5);
	psl_free(psl4);
	psl_free(psl3);
	psl_free((psl_ctx_t *)psl2);
	psl_free(psl);
}
Beispiel #4
0
struct Cookie *
Curl_cookie_add(struct Curl_easy *data,
                /* The 'data' pointer here may be NULL at times, and thus
                   must only be used very carefully for things that can deal
                   with data being NULL. Such as infof() and similar */

                struct CookieInfo *c,
                bool httpheader, /* TRUE if HTTP header-style line */
                bool noexpire, /* if TRUE, skip remove_expired() */
                char *lineptr,   /* first character of the line */
                const char *domain, /* default domain */
                const char *path)   /* full path used when this cookie is set,
                                       used to get default path for the cookie
                                       unless set */
{
  struct Cookie *clist;
  struct Cookie *co;
  struct Cookie *lastc = NULL;
  time_t now = time(NULL);
  bool replace_old = FALSE;
  bool badcookie = FALSE; /* cookies are good by default. mmmmm yummy */
  size_t myhash;

#ifdef USE_LIBPSL
  const psl_ctx_t *psl;
#endif

#ifdef CURL_DISABLE_VERBOSE_STRINGS
  (void)data;
#endif

  /* First, alloc and init a new struct for it */
  co = calloc(1, sizeof(struct Cookie));
  if(!co)
    return NULL; /* bail out if we're this low on memory */

  if(httpheader) {
    /* This line was read off a HTTP-header */
    char name[MAX_NAME];
    char what[MAX_NAME];
    const char *ptr;
    const char *semiptr;

    size_t linelength = strlen(lineptr);
    if(linelength > MAX_COOKIE_LINE) {
      /* discard overly long lines at once */
      free(co);
      return NULL;
    }

    semiptr = strchr(lineptr, ';'); /* first, find a semicolon */

    while(*lineptr && ISBLANK(*lineptr))
      lineptr++;

    ptr = lineptr;
    do {
      /* we have a <what>=<this> pair or a stand-alone word here */
      name[0] = what[0] = 0; /* init the buffers */
      if(1 <= sscanf(ptr, "%" MAX_NAME_TXT "[^;\r\n=] =%"
                     MAX_NAME_TXT "[^;\r\n]",
                     name, what)) {
        /* Use strstore() below to properly deal with received cookie
           headers that have the same string property set more than once,
           and then we use the last one. */
        const char *whatptr;
        bool done = FALSE;
        bool sep;
        size_t len = strlen(what);
        size_t nlen = strlen(name);
        const char *endofn = &ptr[ nlen ];

        if(nlen >= (MAX_NAME-1) || len >= (MAX_NAME-1) ||
           ((nlen + len) > MAX_NAME)) {
          /* too long individual name or contents, or too long combination of
             name + contents. Chrome and Firefox support 4095 or 4096 bytes
             combo. */
          freecookie(co);
          infof(data, "oversized cookie dropped, name/val %d + %d bytes\n",
                nlen, len);
          return NULL;
        }

        /* name ends with a '=' ? */
        sep = (*endofn == '=')?TRUE:FALSE;

        if(nlen) {
          endofn--; /* move to the last character */
          if(ISBLANK(*endofn)) {
            /* skip trailing spaces in name */
            while(*endofn && ISBLANK(*endofn) && nlen) {
              endofn--;
              nlen--;
            }
            name[nlen] = 0; /* new end of name */
          }
        }

        /* Strip off trailing whitespace from the 'what' */
        while(len && ISBLANK(what[len-1])) {
          what[len-1] = 0;
          len--;
        }

        /* Skip leading whitespace from the 'what' */
        whatptr = what;
        while(*whatptr && ISBLANK(*whatptr))
          whatptr++;

        if(!co->name && sep) {
          /* The very first name/value pair is the actual cookie name */
          co->name = strdup(name);
          co->value = strdup(whatptr);
          if(!co->name || !co->value) {
            badcookie = TRUE;
            break;
          }
        }
        else if(!len) {
          /* this was a "<name>=" with no content, and we must allow
             'secure' and 'httponly' specified this weirdly */
          done = TRUE;
          if(strcasecompare("secure", name))
            co->secure = TRUE;
          else if(strcasecompare("httponly", name))
            co->httponly = TRUE;
          else if(sep)
            /* there was a '=' so we're not done parsing this field */
            done = FALSE;
        }
        if(done)
          ;
        else if(strcasecompare("path", name)) {
          strstore(&co->path, whatptr);
          if(!co->path) {
            badcookie = TRUE; /* out of memory bad */
            break;
          }
          free(co->spath); /* if this is set again */
          co->spath = sanitize_cookie_path(co->path);
          if(!co->spath) {
            badcookie = TRUE; /* out of memory bad */
            break;
          }
        }
        else if(strcasecompare("domain", name)) {
          bool is_ip;

          /* Now, we make sure that our host is within the given domain,
             or the given domain is not valid and thus cannot be set. */

          if('.' == whatptr[0])
            whatptr++; /* ignore preceding dot */

#ifndef USE_LIBPSL
          /*
           * Without PSL we don't know when the incoming cookie is set on a
           * TLD or otherwise "protected" suffix. To reduce risk, we require a
           * dot OR the exact host name being "localhost".
           */
          {
            const char *dotp;
            /* check for more dots */
            dotp = strchr(whatptr, '.');
            if(!dotp && !strcasecompare("localhost", whatptr))
              domain = ":";
          }
#endif

          is_ip = isip(domain ? domain : whatptr);

          if(!domain
             || (is_ip && !strcmp(whatptr, domain))
             || (!is_ip && tailmatch(whatptr, domain))) {
            strstore(&co->domain, whatptr);
            if(!co->domain) {
              badcookie = TRUE;
              break;
            }
            if(!is_ip)
              co->tailmatch = TRUE; /* we always do that if the domain name was
                                       given */
          }
          else {
            /* we did not get a tailmatch and then the attempted set domain
               is not a domain to which the current host belongs. Mark as
               bad. */
            badcookie = TRUE;
            infof(data, "skipped cookie with bad tailmatch domain: %s\n",
                  whatptr);
          }
        }
        else if(strcasecompare("version", name)) {
          strstore(&co->version, whatptr);
          if(!co->version) {
            badcookie = TRUE;
            break;
          }
        }
        else if(strcasecompare("max-age", name)) {
          /* Defined in RFC2109:

             Optional.  The Max-Age attribute defines the lifetime of the
             cookie, in seconds.  The delta-seconds value is a decimal non-
             negative integer.  After delta-seconds seconds elapse, the
             client should discard the cookie.  A value of zero means the
             cookie should be discarded immediately.

          */
          strstore(&co->maxage, whatptr);
          if(!co->maxage) {
            badcookie = TRUE;
            break;
          }
        }
        else if(strcasecompare("expires", name)) {
          strstore(&co->expirestr, whatptr);
          if(!co->expirestr) {
            badcookie = TRUE;
            break;
          }
        }
        /*
          else this is the second (or more) name we don't know
          about! */
      }
      else {
        /* this is an "illegal" <what>=<this> pair */
      }

      if(!semiptr || !*semiptr) {
        /* we already know there are no more cookies */
        semiptr = NULL;
        continue;
      }

      ptr = semiptr + 1;
      while(*ptr && ISBLANK(*ptr))
        ptr++;
      semiptr = strchr(ptr, ';'); /* now, find the next semicolon */

      if(!semiptr && *ptr)
        /* There are no more semicolons, but there's a final name=value pair
           coming up */
        semiptr = strchr(ptr, '\0');
    } while(semiptr);

    if(co->maxage) {
      CURLofft offt;
      offt = curlx_strtoofft((*co->maxage == '\"')?
                             &co->maxage[1]:&co->maxage[0], NULL, 10,
                             &co->expires);
      if(offt == CURL_OFFT_FLOW)
        /* overflow, used max value */
        co->expires = CURL_OFF_T_MAX;
      else if(!offt) {
        if(CURL_OFF_T_MAX - now < co->expires)
          /* would overflow */
          co->expires = CURL_OFF_T_MAX;
        else
          co->expires += now;
      }
    }
    else if(co->expirestr) {
      /* Note that if the date couldn't get parsed for whatever reason,
         the cookie will be treated as a session cookie */
      co->expires = curl_getdate(co->expirestr, NULL);

      /* Session cookies have expires set to 0 so if we get that back
         from the date parser let's add a second to make it a
         non-session cookie */
      if(co->expires == 0)
        co->expires = 1;
      else if(co->expires < 0)
        co->expires = 0;
    }

    if(!badcookie && !co->domain) {
      if(domain) {
        /* no domain was given in the header line, set the default */
        co->domain = strdup(domain);
        if(!co->domain)
          badcookie = TRUE;
      }
    }

    if(!badcookie && !co->path && path) {
      /* No path was given in the header line, set the default.
         Note that the passed-in path to this function MAY have a '?' and
         following part that MUST not be stored as part of the path. */
      char *queryp = strchr(path, '?');

      /* queryp is where the interesting part of the path ends, so now we
         want to the find the last */
      char *endslash;
      if(!queryp)
        endslash = strrchr(path, '/');
      else
        endslash = memrchr(path, '/', (size_t)(queryp - path));
      if(endslash) {
        size_t pathlen = (size_t)(endslash-path + 1); /* include end slash */
        co->path = malloc(pathlen + 1); /* one extra for the zero byte */
        if(co->path) {
          memcpy(co->path, path, pathlen);
          co->path[pathlen] = 0; /* zero terminate */
          co->spath = sanitize_cookie_path(co->path);
          if(!co->spath)
            badcookie = TRUE; /* out of memory bad */
        }
        else
          badcookie = TRUE;
      }
    }

    if(badcookie || !co->name) {
      /* we didn't get a cookie name or a bad one,
         this is an illegal line, bail out */
      freecookie(co);
      return NULL;
    }

  }
  else {
    /* This line is NOT a HTTP header style line, we do offer support for
       reading the odd netscape cookies-file format here */
    char *ptr;
    char *firstptr;
    char *tok_buf = NULL;
    int fields;

    /* IE introduced HTTP-only cookies to prevent XSS attacks. Cookies
       marked with httpOnly after the domain name are not accessible
       from javascripts, but since curl does not operate at javascript
       level, we include them anyway. In Firefox's cookie files, these
       lines are preceded with #HttpOnly_ and then everything is
       as usual, so we skip 10 characters of the line..
    */
    if(strncmp(lineptr, "#HttpOnly_", 10) == 0) {
      lineptr += 10;
      co->httponly = TRUE;
    }

    if(lineptr[0]=='#') {
      /* don't even try the comments */
      free(co);
      return NULL;
    }
    /* strip off the possible end-of-line characters */
    ptr = strchr(lineptr, '\r');
    if(ptr)
      *ptr = 0; /* clear it */
    ptr = strchr(lineptr, '\n');
    if(ptr)
      *ptr = 0; /* clear it */

    firstptr = strtok_r(lineptr, "\t", &tok_buf); /* tokenize it on the TAB */

    /* Now loop through the fields and init the struct we already have
       allocated */
    for(ptr = firstptr, fields = 0; ptr && !badcookie;
        ptr = strtok_r(NULL, "\t", &tok_buf), fields++) {
      switch(fields) {
      case 0:
        if(ptr[0]=='.') /* skip preceding dots */
          ptr++;
        co->domain = strdup(ptr);
        if(!co->domain)
          badcookie = TRUE;
        break;
      case 1:
        /* This field got its explanation on the 23rd of May 2001 by
           Andrés García:

           flag: A TRUE/FALSE value indicating if all machines within a given
           domain can access the variable. This value is set automatically by
           the browser, depending on the value you set for the domain.

           As far as I can see, it is set to true when the cookie says
           .domain.com and to false when the domain is complete www.domain.com
        */
        co->tailmatch = strcasecompare(ptr, "TRUE")?TRUE:FALSE;
        break;
      case 2:
        /* It turns out, that sometimes the file format allows the path
           field to remain not filled in, we try to detect this and work
           around it! Andrés García made us aware of this... */
        if(strcmp("TRUE", ptr) && strcmp("FALSE", ptr)) {
          /* only if the path doesn't look like a boolean option! */
          co->path = strdup(ptr);
          if(!co->path)
            badcookie = TRUE;
          else {
            co->spath = sanitize_cookie_path(co->path);
            if(!co->spath) {
              badcookie = TRUE; /* out of memory bad */
            }
          }
          break;
        }
        /* this doesn't look like a path, make one up! */
        co->path = strdup("/");
        if(!co->path)
          badcookie = TRUE;
        co->spath = strdup("/");
        if(!co->spath)
          badcookie = TRUE;
        fields++; /* add a field and fall down to secure */
        /* FALLTHROUGH */
      case 3:
        co->secure = strcasecompare(ptr, "TRUE")?TRUE:FALSE;
        break;
      case 4:
        if(curlx_strtoofft(ptr, NULL, 10, &co->expires))
          badcookie = TRUE;
        break;
      case 5:
        co->name = strdup(ptr);
        if(!co->name)
          badcookie = TRUE;
        break;
      case 6:
        co->value = strdup(ptr);
        if(!co->value)
          badcookie = TRUE;
        break;
      }
    }
    if(6 == fields) {
      /* we got a cookie with blank contents, fix it */
      co->value = strdup("");
      if(!co->value)
        badcookie = TRUE;
      else
        fields++;
    }

    if(!badcookie && (7 != fields))
      /* we did not find the sufficient number of fields */
      badcookie = TRUE;

    if(badcookie) {
      freecookie(co);
      return NULL;
    }

  }

  if(!c->running &&    /* read from a file */
     c->newsession &&  /* clean session cookies */
     !co->expires) {   /* this is a session cookie since it doesn't expire! */
    freecookie(co);
    return NULL;
  }

  co->livecookie = c->running;

  /* now, we have parsed the incoming line, we must now check if this
     superceeds an already existing cookie, which it may if the previous have
     the same domain and path as this */

  /* at first, remove expired cookies */
  if(!noexpire)
    remove_expired(c);

#ifdef USE_LIBPSL
  /* Check if the domain is a Public Suffix and if yes, ignore the cookie.
     This needs a libpsl compiled with builtin data. */
  if(domain && co->domain && !isip(co->domain)) {
    psl = psl_builtin();
    if(psl && !psl_is_cookie_domain_acceptable(psl, domain, co->domain)) {
      infof(data,
            "cookie '%s' dropped, domain '%s' must not set cookies for '%s'\n",
            co->name, domain, co->domain);
      freecookie(co);
      return NULL;
    }
  }
#endif

  myhash = cookiehash(co->domain);
  clist = c->cookies[myhash];
  replace_old = FALSE;
  while(clist) {
    if(strcasecompare(clist->name, co->name)) {
      /* the names are identical */

      if(clist->domain && co->domain) {
        if(strcasecompare(clist->domain, co->domain) &&
          (clist->tailmatch == co->tailmatch))
          /* The domains are identical */
          replace_old = TRUE;
      }
      else if(!clist->domain && !co->domain)
        replace_old = TRUE;

      if(replace_old) {
        /* the domains were identical */

        if(clist->spath && co->spath) {
          if(strcasecompare(clist->spath, co->spath)) {
            replace_old = TRUE;
          }
          else
            replace_old = FALSE;
        }
        else if(!clist->spath && !co->spath)
          replace_old = TRUE;
        else
          replace_old = FALSE;

      }

      if(replace_old && !co->livecookie && clist->livecookie) {
        /* Both cookies matched fine, except that the already present
           cookie is "live", which means it was set from a header, while
           the new one isn't "live" and thus only read from a file. We let
           live cookies stay alive */

        /* Free the newcomer and get out of here! */
        freecookie(co);
        return NULL;
      }

      if(replace_old) {
        co->next = clist->next; /* get the next-pointer first */

        /* then free all the old pointers */
        free(clist->name);
        free(clist->value);
        free(clist->domain);
        free(clist->path);
        free(clist->spath);
        free(clist->expirestr);
        free(clist->version);
        free(clist->maxage);

        *clist = *co;  /* then store all the new data */

        free(co);   /* free the newly alloced memory */
        co = clist; /* point to the previous struct instead */

        /* We have replaced a cookie, now skip the rest of the list but
           make sure the 'lastc' pointer is properly set */
        do {
          lastc = clist;
          clist = clist->next;
        } while(clist);
        break;
      }
    }
    lastc = clist;
    clist = clist->next;
  }

  if(c->running)
    /* Only show this when NOT reading the cookies from a file */
    infof(data, "%s cookie %s=\"%s\" for domain %s, path %s, "
          "expire %" CURL_FORMAT_CURL_OFF_T "\n",
          replace_old?"Replaced":"Added", co->name, co->value,
          co->domain, co->path, co->expires);

  if(!replace_old) {
    /* then make the last item point on this new one */
    if(lastc)
      lastc->next = co;
    else
      c->cookies[myhash] = co;
    c->numcookies++; /* one more cookie in the jar */
  }

  return co;
}
Beispiel #5
0
int main(int argc, const char *const *argv)
{
	int mode = 1;
	const char *const *arg, *psl_file = NULL, *cookie_domain = NULL;
	psl_ctx_t *psl = (psl_ctx_t *) psl_builtin();

	/* set current locale according to the environment variables */
	#include <locale.h>
	setlocale(LC_ALL, "");

	for (arg = argv + 1; arg < argv + argc; arg++) {
		if (!strncmp(*arg, "--", 2)) {
			if (!strcmp(*arg, "--is-public-suffix"))
				mode = 1;
			else if (!strcmp(*arg, "--print-unreg-domain"))
				mode = 2;
			else if (!strcmp(*arg, "--print-reg-domain"))
				mode = 3;
			else if (!strcmp(*arg, "--print-info"))
				mode = 99;
			else if (!strcmp(*arg, "--is-cookie-domain-acceptable") && arg < argv + argc - 1) {
				mode = 4;
				cookie_domain = *(++arg);
			}
			else if (!strcmp(*arg, "--use-builtin-data")) {
				psl_free(psl);
				if (psl_file) {
					fprintf(stderr, "Dropped data from %s\n", psl_file);
					psl_file = NULL;
				}
				if (!(psl = (psl_ctx_t *) psl_builtin()))
					printf("No builtin PSL data available\n");
			}
			else if (!strcmp(*arg, "--load-psl-file") && arg < argv + argc - 1) {
				psl_free(psl);
				if (psl_file) {
					fprintf(stderr, "Dropped data from %s\n", psl_file);
					psl_file = NULL;
				}
				if (!(psl = psl_load_file(psl_file = *(++arg)))) {
					fprintf(stderr, "Failed to load PSL data from %s\n\n", psl_file);
					psl_file = NULL;
				}
			}
			else if (!strcmp(*arg, "--help")) {
				fprintf(stdout, "`psl' explores the Public Suffix List\n\n");
				usage(0, stdout);
			}
			else if (!strcmp(*arg, "--version")) {
				printf("psl %s\n", PACKAGE_VERSION);
				printf("libpsl %s\n", psl_get_version());
				printf("\n");
				printf("Copyright (C) 2014-2015 Tim Ruehsen\n");
				printf("License: MIT\n");
				exit(0);
			}
			else if (!strcmp(*arg, "--")) {
				arg++;
				break;
			}
			else {
				fprintf(stderr, "Unknown option '%s'\n", *arg);
				usage(1, stderr);
			}
		} else
			break;
	}

	if (mode != 99) {
		if (!psl) {
			fprintf(stderr, "No PSL data available - aborting\n");
			exit(2);
		}
		if (arg >= argv + argc) {
			char buf[256], *domain, *lower;
			size_t len;
			psl_error_t rc;

			/* read URLs from STDIN */
			while (fgets(buf, sizeof(buf), stdin)) {
				for (domain = buf; isspace(*domain); domain++); /* skip leading spaces */
				if (*domain == '#' || !*domain) continue; /* skip empty lines and comments */
				for (len = strlen(domain); len && isspace(domain[len - 1]); len--); /* skip trailing spaces */
				domain[len] = 0;

				if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &lower)) != PSL_SUCCESS)
					fprintf(stderr, "%s: Failed to convert to lowercase UTF-8 (%d)\n", domain, rc);
				else if (mode == 1)
					printf("%s: %d (%s)\n", domain, psl_is_public_suffix(psl, lower), lower);
				else if (mode == 2)
					printf("%s: %s\n", domain, psl_unregistrable_domain(psl, lower));
				else if (mode == 3)
					printf("%s: %s\n", domain, psl_registrable_domain(psl, lower));
				else if (mode == 4) {
					char *cookie_domain_lower;

					if ((rc = psl_str_to_utf8lower(domain, NULL, NULL, &cookie_domain_lower)) != PSL_SUCCESS)
						fprintf(stderr, "%s: Failed to convert cookie domain '%s' to lowercase UTF-8 (%d)\n", domain, cookie_domain, rc);
					else
						printf("%s: %d\n", domain, psl_is_cookie_domain_acceptable(psl, lower, cookie_domain));

					free(cookie_domain_lower);
				}

				free(lower);
			}

			psl_free(psl);
			exit(0);
		}
	}

	if (mode == 1) {
		for (; arg < argv + argc; arg++)
			printf("%s: %d\n", *arg, psl_is_public_suffix(psl, *arg));
	}
	else if (mode == 2) {
		for (; arg < argv + argc; arg++)
			printf("%s: %s\n", *arg, psl_unregistrable_domain(psl, *arg));
	}
	else if (mode == 3) {
		for (; arg < argv + argc; arg++)
			printf("%s: %s\n", *arg, psl_registrable_domain(psl, *arg));
	}
	else if (mode == 4) {
		for (; arg < argv + argc; arg++)
			printf("%s: %d\n", *arg, psl_is_cookie_domain_acceptable(psl, *arg, cookie_domain));
	}
	else if (mode == 99) {
		if (psl && psl != psl_builtin()) {
			printf("suffixes: %d\n", psl_suffix_count(psl));
			printf("exceptions: %d\n", psl_suffix_exception_count(psl));
			printf("wildcards: %d\n", psl_suffix_wildcard_count(psl));
		}

		psl_free(psl);
		psl = (psl_ctx_t *) psl_builtin();

		if (psl) {
			printf("builtin suffixes: %d\n", psl_suffix_count(psl));
			printf("builtin exceptions: %d\n", psl_suffix_exception_count(psl));
			printf("builtin wildcards: %d\n", psl_suffix_wildcard_count(psl));
			printf("builtin filename: %s\n", psl_builtin_filename());
			printf("builtin compile time: %ld (%s)\n", psl_builtin_compile_time(), time2str(psl_builtin_compile_time()));
			printf("builtin file time: %ld (%s)\n", psl_builtin_file_time(), time2str(psl_builtin_file_time()));
			printf("builtin SHA1 file hash: %s\n", psl_builtin_sha1sum());
		} else
			printf("No builtin PSL data available\n");
	}

	psl_free(psl);

	return 0;
}
Beispiel #6
0
static void test_psl(void)
{
	/* punycode generation: idn ?? */
	/* octal code generation: echo -n "??" | od -b */
	static const struct test_data {
		const char
			*domain;
		int
			result;
	} test_data[] = {
		{ "www.example.com", 0 },
		{ "com.ar", 1 },
		{ "www.com.ar", 0 },
		{ "cc.ar.us", 1 },
		{ ".cc.ar.us", 1 },
		{ "www.cc.ar.us", 0 },
		{ "www.ck", 0 }, /* exception from *.ck */
		{ "abc.www.ck", 0 },
		{ "xxx.ck", 1 },
		{ "www.xxx.ck", 0 },
		{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b oder ?? */
		{ "www.\345\225\206\346\240\207", 0 },
		{ "xn--czr694b", 1 },
		{ "www.xn--czr694b", 0 },
		/* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */
		{ "name", 1 },
		{ ".name", 1 },
		{ "his.name", 0 },
		{ ".his.name", 0 },
		{ "forgot.his.name", 1 },
		{ ".forgot.his.name", 1 },
		{ "whoever.his.name", 0 },
		{ "whoever.forgot.his.name", 0 },
		{ ".", 1 }, /* special case */
		{ "", 1 },  /* special case */
		{ NULL, 1 },  /* special case */
		{ "adfhoweirh", 1 }, /* unknown TLD */
	};
	unsigned it;
	const psl_ctx_t *psl;

	psl = psl_builtin();

	printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));

	for (it = 0; it < countof(test_data); it++) {
		const struct test_data *t = &test_data[it];
		int result = psl_is_public_suffix(psl, t->domain);

		if (result == t->result) {
			ok++;
		} else {
			failed++;
			printf("psl_is_public_suffix(%s)=%d (expected %d)\n", t->domain, result, t->result);
		}
	}

	printf("psl_builtin_compile_time()=%ld\n", psl_builtin_compile_time());
	psl_builtin_compile_time() == 0 ? failed++ : ok++;

	printf("psl_builtin_file_time()=%ld\n", psl_builtin_file_time());
	psl_builtin_file_time() == 0 ? failed++ : ok++;

	printf("psl_builtin_sha1sum()=%s\n", psl_builtin_sha1sum());
	*psl_builtin_sha1sum() == 0 ? failed++ : ok++;
}
static void test_psl(void)
{
	FILE *fp;
	const psl_ctx_t *psl;
	const char *p;
	char buf[256], domain[128], expected_regdom[128], semicolon[2];
	char lbuf[258];
	int er_is_null, d_is_null;
	unsigned it;

	psl = psl_builtin();

	printf("have %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));

	/* special check with NULL values */
	test(NULL, NULL, NULL);

	/* special check with NULL psl context */
	test(NULL, "www.example.com", NULL);

	/* special check with NULL psl context and TLD */
	test(NULL, "com", NULL);

	/* Norwegian with uppercase oe */
#ifdef WITH_LIBICU
	test(psl, "www.\303\230yer.no", "www.\303\270yer.no");
#endif

	/* Norwegian with lowercase oe */
	test(psl, "www.\303\270yer.no", "www.\303\270yer.no");

	/* Norwegian with lowercase oe, encoded as ISO-8859-15 */
	test_iso(psl, "www.\370yer.no", "www.\303\270yer.no");

	/* Testing special code paths of psl_str_to_utf8lower() */
	for (it = 254; it <= 257; it++) {
		memset(lbuf, 'a', it);
		lbuf[it] = 0;

		lbuf[0] = '\370';
		test_iso(psl, lbuf, NULL);

		lbuf[0] = '\303';
		lbuf[1] = '\270';
		test(psl, lbuf, NULL);
	}

	/* special check with NULL psl context and TLD */
	test(psl, "whoever.forgot.his.name", "whoever.forgot.his.name");

	/* special check with NULL psl context and TLD */
	test(psl, "forgot.his.name", NULL);

	/* special check with NULL psl context and TLD */
	test(psl, "his.name", "his.name");

	if ((fp = fopen(PSL_TESTFILE, "r"))) {
		while ((fgets(buf, sizeof(buf), fp))) {
			/* advance over ASCII white space */
			for (p = buf; *p == ' ' || *p == '\t' || *p == '\r' || *p == '\n'; p++)
				;

			if (!*p || (*p == '/' && p[1] == '/'))
				continue; /* ignore comments and blank lines */

			er_is_null = 0;
			d_is_null = 0;

			if (sscanf(p, "checkPublicSuffix ( '%127[^']' , '%127[^']' ) %1[;]", domain, expected_regdom, semicolon) != 3) {
				if (sscanf(p, "checkPublicSuffix ( '%127[^']' , null ) %1[;]", domain, semicolon) == 2) {
					er_is_null = 1;
				} else if (sscanf(p, "checkPublicSuffix ( null , '%127[^']' ) %1[;]", expected_regdom, semicolon) == 2) {
					d_is_null = 1;
				} else if (sscanf(p, "checkPublicSuffix ( null , null ) %1[;]", semicolon) == 1) {
					d_is_null = 1;
					er_is_null = 1;
				} else if (sscanf(p, "%127s %127s", domain, expected_regdom) == 2) {
					if (!strcmp(domain, "null"))
						d_is_null = 1;
					if (!strcmp(expected_regdom, "null"))
						er_is_null = 1;
				} else {
					failed++;
					printf("Malformed line from '" PSL_TESTFILE "': %s", buf);
					continue;
				}
			}

			test(psl, d_is_null ? NULL : domain, er_is_null ? NULL : expected_regdom);
		}

		fclose(fp);
	} else {
		printf("Failed to open %s\n", PSL_TESTFILE);
		failed++;
	}
}
Beispiel #8
0
static void test_psl(void)
{
	/* punycode generation: idn ?? */
	/* octal code generation: echo -n "??" | od -b */
	static const struct test_data {
		const char
			*domain;
		int
			result;
	} test_data[] = {
		{ "www.example.com", 0 },
		{ "com.ar", 1 },
		{ "www.com.ar", 0 },
		{ "cc.ar.us", 1 },
		{ ".cc.ar.us", 1 },
		{ "www.cc.ar.us", 0 },
		{ "www.ck", 0 }, /* exception from *.ck */
		{ "abc.www.ck", 0 },
		{ "xxx.ck", 1 },
		{ "www.xxx.ck", 0 },
		{ "\345\225\206\346\240\207", 1 }, /* xn--czr694b or ?? */
		{ "www.\345\225\206\346\240\207", 0 },
		/* some special test follow ('name' and 'forgot.his.name' are public, but e.g. his.name is not) */
		{ "name", 1 },
		{ ".name", 1 },
		{ "his.name", 0 },
		{ ".his.name", 0 },
		{ "forgot.his.name", 1 },
		{ ".forgot.his.name", 1 },
		{ "whoever.his.name", 0 },
		{ "whoever.forgot.his.name", 0 },
		{ ".", 1 }, /* special case */
		{ "", 1 },  /* special case */
		{ NULL, 1 },  /* special case */
		{ "adfhoweirh", 1 }, /* unknown TLD */
	};
	unsigned it;
	int result, ver;
	psl_ctx_t *psl;

	psl = psl_load_file(PSL_FILE);

	printf("loaded %d suffixes and %d exceptions\n", psl_suffix_count(psl), psl_suffix_exception_count(psl));

	for (it = 0; it < countof(test_data); it++) {
		const struct test_data *t = &test_data[it];
		result = psl_is_public_suffix(psl, t->domain);

		if (result == t->result) {
			ok++;
		} else {
			failed++;
			printf("psl_is_public_suffix(%s)=%d (expected %d)\n", t->domain, result, t->result);
		}
	}

	/* do some checks to cover more code paths in libpsl */
	psl_is_public_suffix(NULL, "xxx");

	if ((ver = psl_check_version_number(0)) == 0) {
		printf("psl_check_version_number(0) is 0\n");
		failed++;
	} else {
		if (((result = psl_check_version_number(ver)) != ver)) {
			printf("psl_check_version_number(%06X) is %06X\n", ver, result);
			failed++;
		}

		if (((result = psl_check_version_number(ver - 1)) != 0)) {
			printf("psl_check_version_number(%06X) is %06X\n", ver - 1, result);
			failed++;
		}

		if (((result = psl_check_version_number(ver + 1)) != ver)) {
			printf("psl_check_version_number(%06X) is %06X\n", ver, result);
			failed++;
		}
	}

	psl_str_to_utf8lower("www.example.com", "utf-8", "en", NULL);
	psl_str_to_utf8lower(NULL, "utf-8", "en", NULL);

	{
		char *lower = NULL;

		psl_str_to_utf8lower("www.example.com", NULL, "de", &lower);
		free(lower); lower = NULL;

		psl_str_to_utf8lower("\374bel.de", NULL, "de", &lower);
		free(lower); lower = NULL;

		psl_str_to_utf8lower("\374bel.de", "iso-8859-1", NULL, &lower);
		free(lower); lower = NULL;

		psl_str_to_utf8lower(NULL, "utf-8", "en", &lower);
		free(lower); lower = NULL;
	}

	psl_get_version();
	psl_dist_filename();
	psl_builtin_filename();
	psl_builtin_outdated();
	psl_builtin_file_time();
	psl_builtin_sha1sum();
	psl_suffix_wildcard_count(NULL);
	psl_suffix_wildcard_count(psl);
	psl_suffix_wildcard_count(psl_builtin());
	psl_suffix_count(NULL);
	psl_suffix_exception_count(NULL);
	psl_load_file(NULL);
	psl_load_fp(NULL);
	psl_registrable_domain(NULL, "");
	psl_registrable_domain(psl, NULL);
	psl_registrable_domain(psl, "www.example.com");
	psl_unregistrable_domain(NULL, "");
	psl_unregistrable_domain(psl, NULL);
	psl_is_public_suffix2(NULL, "", PSL_TYPE_ANY);
	psl_is_public_suffix2(psl, NULL, PSL_TYPE_ANY);

	psl_free(psl);
}