Exemplo n.º 1
0
Arquivo: test.c Projeto: BIllli/mget
static void test_iri_compare(void)
{
	static const struct iri_test_data {
		const char
			*url1,
			*url2;
		int
			result;
	} test_data[] = {
		{ "http://abc.com", "http://abc.com/", -1}, // different, some web servers redirect ... to .../ due to normalization issues
		{ "http://abc.com", "http://abc.com:", 0},
		{ "http://abc.com", "http://abc.com:/", -1},
		{ "http://abc.com", "http://abc.com:80/", -1},
		{ "http://abc.com", "http://abc.com:80//", -1},
		// { "http://äöü.com", "http://ÄÖÜ.com:80//", 0},
		{ "http://abc.com:80/~smith/home.html", "http://abc.com/~smith/home.html", 0},
		{ "http://abc.com:80/~smith/home.html", "http://ABC.com/~smith/home.html", 0},
		{ "http://abc.com:80/~smith/home.html", "http://ABC.com/%7Esmith/home.html", 0},
		{ "http://abc.com:80/~smith/home.html", "http://ABC.com/%7esmith/home.html", 0},
		{ "http://ABC.com/%7esmith/home.html", "http://ABC.com/%7Esmith/home.html", 0},
		{ "http://ABC.com/%7esmith/home.html", "http://ACB.com/%7Esmith/home.html", -1}
	};
	unsigned it;
	int n;

	for (it = 0; it < countof(test_data); it++) {
		const struct iri_test_data *t = &test_data[it];
		mget_iri_t *iri1 = mget_iri_parse(t->url1, "utf-8");
		mget_iri_t *iri2 = mget_iri_parse(t->url2, "utf-8");

		n = mget_iri_compare(iri1, iri2);
		if (n < -1) n = -1;
		else if (n > 1) n = 1;

		if (n == t->result)
			ok++;
		else {
			failed++;
			info_printf("Failed [%u]: compare(%s,%s) -> %d (expected %d)\n", it, t->url1, t->url2, n, t->result);
			printf("  display %s / %s\n", iri1->display, iri2->display);
			printf("  scheme %s / %s\n", iri1->scheme, iri2->scheme);
			printf("  user %s / %s\n", iri1->userinfo, iri2->userinfo);
			printf("  host %s / %s\n", iri1->host, iri2->host);
			printf("  port %s / %s\n", iri1->port, iri2->port);
			printf("  path %s / %s\n", iri1->path, iri2->path);
			printf("  query %s / %s\n", iri1->query, iri2->query);
			printf("  fragment %s / %s\n", iri1->fragment, iri2->fragment);
			printf("\n");
		}

		mget_iri_free(&iri2);
		mget_iri_free(&iri1);
	}
}
Exemplo n.º 2
0
Arquivo: job.c Projeto: darnir/mget
void queue_del(JOB *job)
{
	if (job) {
		debug_printf("queue_del %p\n", (void *)job);

		// special handling for automatic robots.txt jobs
		if (job->deferred) {
			JOB new_job = { .iri = NULL };

			if (job->host)
				job->host->robot_job = NULL;
			mget_iri_free(&job->iri);

			// create a job for each deferred IRI
			for (int it = 0; it < mget_vector_size(job->deferred); it++) {
				new_job.iri = mget_vector_get(job->deferred, it);
				new_job.local_filename = get_local_filename(new_job.iri);
				queue_add_job(&new_job);
			}
		}

		job_free(job);

		mget_thread_mutex_lock(&mutex);
		mget_list_remove(&queue, job);
		mget_thread_mutex_unlock(&mutex);
	}
Exemplo n.º 3
0
mget_iri_t *blacklist_add(mget_iri_t *iri)
{
	if (!iri)
		return NULL;

	if (mget_iri_supported(iri)) {
		mget_thread_mutex_lock(&mutex);

		if (!blacklist) {
			blacklist = mget_hashmap_create(128, -2, (unsigned int(*)(const void *))hash_iri, (int(*)(const void *, const void *))mget_iri_compare);
			mget_hashmap_set_destructor(blacklist, (void(*)(void *, void *))_free_entry);
		}

		if (!mget_hashmap_contains(blacklist, iri)) {
			// info_printf("Add to blacklist: %s\n",iri->uri);
			mget_hashmap_put_noalloc(blacklist, iri, NULL); // use hashmap as a hashset (without value)
			mget_thread_mutex_unlock(&mutex);
			return iri;
		}

		mget_thread_mutex_unlock(&mutex);
	}

	mget_iri_free(&iri);

	return NULL;
}
Exemplo n.º 4
0
Arquivo: test.c Projeto: BIllli/mget
static void test_cookies(void)
{
	static const struct test_data {
		const char
			*uri,
			*set_cookie,
			*name, *value, *domain, *path, *expires;
		unsigned int
			domain_dot : 1, // for compatibility with Netscape cookie format
			normalized : 1,
			persistent : 1,
			host_only : 1,
			secure_only : 1, // cookie should be used over secure connections only (TLS/HTTPS)
			http_only : 1; // just use the cookie via HTTP/HTTPS protocol
		int
			result,
			psl_result;
	} test_data[] = {
		{	// allowed cookie
			"www.example.com",
			"ID=65=abcd; expires=Tuesday, 07-May-2013 07:48:53 GMT; path=/; domain=.example.com; HttpOnly",
			"ID", "65=abcd", "example.com", "/", "Tue, 07 May 2013 07:48:53 GMT",
			1, 1, 1, 0, 0, 1,
			0, 0
		},
		{	// allowed cookie ANSI C's asctime format
			"www.example.com",
			"ID=65=abcd; expires=Tue May 07 07:48:53 2013; path=/; domain=.example.com",
			"ID", "65=abcd", "example.com", "/", "Tue, 07 May 2013 07:48:53 GMT",
			1, 1, 1, 0, 0, 0,
			0, 0
		},
		{	// allowed cookie without path
			"www.example.com",
			"ID=65=abcd; expires=Tue, 07-May-2013 07:48:53 GMT; domain=.example.com",
			"ID", "65=abcd", "example.com", "/", "Tue, 07 May 2013 07:48:53 GMT",
			1, 1, 1, 0, 0, 0,
			0, 0
		},
		{	// allowed cookie without domain
			"www.example.com",
			"ID=65=abcd; expires=Tue, 07-May-2013 07:48:53 GMT; path=/",
			"ID", "65=abcd", "www.example.com", "/", "Tue, 07 May 2013 07:48:53 GMT",
			0, 1, 1, 1, 0, 0,
			0, 0
		},
		{	// allowed cookie without domain, path and expires
			"www.example.com",
			"ID=65=abcd",
			"ID", "65=abcd", "www.example.com", "/", "Tue, 07 May 2013 07:48:53 GMT",
			0, 1, 0, 1, 0, 0,
			0, 0
		},
		{	// illegal cookie
			"www.example.com",
			"ID=65=abcd; expires=Tue, 07-May-2013 07:48:53 GMT; path=/; domain=.example.org",
			"ID", "65=abcd", "example.org", "/", "Tue, 07 May 2013 07:48:53 GMT",
			1, 0, 1, 0, 0, 0,
			-1, 0
		},
#ifdef WITH_LIBPSL
		{	// supercookie, accepted by normalization (rule 'com') but not by mget_cookie_check_psl())
			"www.example.com",
			"ID=65=abcd; expires=Mon, 29-Feb-2016 07:48:54 GMT; path=/; domain=.com; HttpOnly; Secure",
			"ID", "65=abcd", "com", "/", "Mon, 29 Feb 2016 07:48:54 GMT",
			1, 0, 1, 0, 1, 1,
			0, -1
		},
		{	// supercookie, accepted by normalization  (rule '*.ar') but not by mget_cookie_check_psl())
			"www.sa.gov.au",
			"ID=65=abcd; expires=Tue, 29-Feb-2000 07:48:55 GMT; path=/; domain=.sa.gov.au",
			"ID", "65=abcd", "sa.gov.au", "/", "Tue, 29 Feb 2000 07:48:55 GMT",
			1, 0, 1, 0, 0, 0,
			0, -1
		},
#endif
		{	// exception rule '!educ.ar', accepted by normalization
			"www.educ.ar",
			"ID=65=abcd; path=/; domain=.educ.ar",
			"ID", "65=abcd", "educ.ar", "/", NULL,
			1, 1, 0, 0, 0, 0,
			0, 0
		},
	};
	mget_cookie_t cookie;
	mget_cookie_db_t *cookies;
	mget_iri_t *iri;
	unsigned it;
	int result, result_psl;

	cookies = mget_cookie_db_init(NULL);
	mget_cookie_db_load_psl(cookies, DATADIR "/effective_tld_names.dat");

	for (it = 0; it < countof(test_data); it++) {
		const struct test_data *t = &test_data[it];
		char thedate[32], *header;

		iri = mget_iri_parse(t->uri, "utf-8");
		mget_http_parse_setcookie(t->set_cookie, &cookie);
		if ((result = mget_cookie_normalize(iri, &cookie)) != t->result) {
			failed++;
			info_printf("Failed [%u]: normalize_cookie(%s) -> %d (expected %d)\n", it, t->set_cookie, result, t->result);
			mget_cookie_deinit(&cookie);
			goto next;
		} else {
			if ((result_psl = mget_cookie_check_psl(cookies, &cookie)) != t->psl_result) {
				failed++;
				info_printf("Failed [%u]: PSL check(%s) -> %d (expected %d)\n", it, t->set_cookie, result_psl, t->psl_result);
			}
			mget_cookie_deinit(&cookie);
			goto next;
		}

		if (cookie.expires) {
			mget_http_print_date(cookie.expires, thedate, sizeof(thedate));
			if (strcmp(thedate, t->expires)) {
				failed++;
				info_printf("Failed [%u]: expires mismatch: '%s' != '%s' (time_t %lld)\n", it, thedate, t->expires, (long long)cookie.expires);
				mget_cookie_deinit(&cookie);
				goto next;
			}
		}

		if (strcmp(cookie.name, t->name) ||
			strcmp(cookie.value, t->value) ||
			strcmp(cookie.domain, t->domain) ||
			strcmp(cookie.path, t->path) ||
			cookie.domain_dot != t->domain_dot ||
			cookie.normalized != t->normalized ||
			cookie.persistent != t->persistent ||
			cookie.host_only != t->host_only ||
			cookie.secure_only != t->secure_only ||
			cookie.http_only != t->http_only)
		{
			failed++;

			info_printf("Failed [%u]: cookie (%s) differs:\n", it, t->set_cookie);
			if (strcmp(cookie.name, t->name))
				info_printf("  name %s (expected %s)\n", cookie.name, t->name);
			if (strcmp(cookie.value, t->value))
				info_printf("  value %s (expected %s)\n", cookie.value, t->value);
			if (strcmp(cookie.domain, t->domain))
				info_printf("  domain %s (expected %s)\n", cookie.domain, t->domain);
			if (strcmp(cookie.path, t->path))
				info_printf("  path %s (expected %s)\n", cookie.path, t->path);
			if (cookie.domain_dot != t->domain_dot)
				info_printf("  domain_dot %d (expected %d)\n", cookie.domain_dot, t->domain_dot);
			if (cookie.normalized != t->normalized)
				info_printf("  normalized %d (expected %d)\n", cookie.normalized, t->normalized);
			if (cookie.persistent != t->persistent)
				info_printf("  persistent %d (expected %d)\n", cookie.persistent, t->persistent);
			if (cookie.host_only != t->host_only)
				info_printf("  host_only %d (expected %d)\n", cookie.host_only, t->host_only);
			if (cookie.secure_only != t->secure_only)
				info_printf("  secure_only %d (expected %d)\n", cookie.secure_only, t->secure_only);
			if (cookie.http_only != t->http_only)
				info_printf("  http_only %d (expected %d)\n", cookie.http_only, t->http_only);

			mget_cookie_deinit(&cookie);
			goto next;
		}

		mget_cookie_store_cookie(cookies, &cookie);

		info_printf("%s\n", header = mget_cookie_create_request_header(cookies, iri));
		xfree(header);

		ok++;

next:
		mget_iri_free(&iri);
	}

	mget_cookie_db_free(&cookies);
}
Exemplo n.º 5
0
Arquivo: test.c Projeto: BIllli/mget
static void test_iri_relative_to_absolute(void)
{
	static const struct iri_test_data {
		const char
			*base,
			*relative,
			*result;
	} test_data[] = {
#define H1 "http://x.tld"
		{ H1, "", H1"/" },
		{ H1, ".", H1"/" },
		{ H1, "./", H1"/" },
		{ H1, "..", H1"/" },
		{ H1, "../", H1"/" },
		{ H1, "foo", H1"/foo" },
		{ H1, "foo/bar", H1"/foo/bar" },
		{ H1, "foo///bar", H1"/foo/bar" },
		{ H1, "foo/.", H1"/foo/" },
		{ H1, "foo/./", H1"/foo/" },
		{ H1, "foo./", H1"/foo./" },
		{ H1, "foo/../bar", H1"/bar" },
		{ H1, "foo/../bar/", H1"/bar/" },
		{ H1, "foo/bar/..", H1"/foo/" },
		{ H1, "foo/bar/../x", H1"/foo/x" },
		{ H1, "foo/bar/../x/", H1"/foo/x/" },
		{ H1, "foo/..", H1"/" },
		{ H1, "foo/../..", H1"/" },
		{ H1, "foo/../../..", H1"/" },
		{ H1, "foo/../../bar/../../baz", H1"/baz" },
		{ H1, "a/b/../../c", H1"/c" },
		{ H1, "./a/../b", H1"/b" },
		{ H1, "/", H1"/" },
		{ H1, "/.", H1"/" },
		{ H1, "/./", H1"/" },
		{ H1, "/..", H1"/" },
		{ H1, "/../", H1"/" },
		{ H1, "/foo", H1"/foo" },
		{ H1, "/foo/bar", H1"/foo/bar" },
		{ H1, "/foo///bar", H1"/foo/bar" },
		{ H1, "/foo/.", H1"/foo/" },
		{ H1, "/foo/./", H1"/foo/" },
		{ H1, "/foo./", H1"/foo./" },
		{ H1, "/foo/../bar", H1"/bar" },
		{ H1, "/foo/../bar/", H1"/bar/" },
		{ H1, "/foo/bar/..", H1"/foo/" },
		{ H1, "/foo/bar/../x", H1"/foo/x" },
		{ H1, "/foo/bar/../x/", H1"/foo/x/" },
		{ H1, "/foo/..", H1"/" },
		{ H1, "/foo/../..", H1"/" },
		{ H1, "/foo/../../..", H1"/" },
		{ H1, "/foo/../../bar/../../baz", H1"/baz" },
		{ H1, "/a/b/../../c", H1"/c" },
		{ H1, "/./a/../b", H1"/b" },
		{ H1, ".x", H1"/.x" },
		{ H1, "..x", H1"/..x" },
		{ H1, "foo/.x", H1"/foo/.x" },
		{ H1, "foo/bar/.x", H1"/foo/bar/.x" },
		{ H1, "foo/..x", H1"/foo/..x" },
		{ H1, "foo/bar/..x", H1"/foo/bar/..x" },
		{ H1, "/x.php?y=ftp://example.com/&z=1_2", H1"/x.php?y=ftp://example.com/&z=1_2" },
		{ H1, "//x.y.com/", "http://x.y.com/" },
		{ H1, "http://x.y.com/", "http://x.y.com/" },
//		{ H1, "site;sub:.html", H1"/site;sub:.html" },
#undef H1
#define H1 "http://x.tld/"
		{ H1, "", H1"" },
		{ H1, ".", H1"" },
		{ H1, "./", H1"" },
		{ H1, "..", H1"" },
		{ H1, "../", H1"" },
		{ H1, "foo", H1"foo" },
		{ H1, "foo/bar", H1"foo/bar" },
		{ H1, "foo///bar", H1"foo/bar" },
		{ H1, "foo/.", H1"foo/" },
		{ H1, "foo/./", H1"foo/" },
		{ H1, "foo./", H1"foo./" },
		{ H1, "foo/../bar", H1"bar" },
		{ H1, "foo/../bar/", H1"bar/" },
		{ H1, "foo/bar/..", H1"foo/" },
		{ H1, "foo/bar/../x", H1"foo/x" },
		{ H1, "foo/bar/../x/", H1"foo/x/" },
		{ H1, "foo/..", H1"" },
		{ H1, "foo/../..", H1"" },
		{ H1, "foo/../../..", H1"" },
		{ H1, "foo/../../bar/../../baz", H1"baz" },
		{ H1, "a/b/../../c", H1"c" },
		{ H1, "./a/../b", H1"b" },
		{ H1, "/", H1"" },
		{ H1, "/.", H1"" },
		{ H1, "/./", H1"" },
		{ H1, "/..", H1"" },
		{ H1, "/../", H1"" },
		{ H1, "/foo", H1"foo" },
		{ H1, "/foo/bar", H1"foo/bar" },
		{ H1, "/foo///bar", H1"foo/bar" },
		{ H1, "/foo/.", H1"foo/" },
		{ H1, "/foo/./", H1"foo/" },
		{ H1, "/foo./", H1"foo./" },
		{ H1, "/foo/../bar", H1"bar" },
		{ H1, "/foo/../bar/", H1"bar/" },
		{ H1, "/foo/bar/..", H1"foo/" },
		{ H1, "/foo/bar/../x", H1"foo/x" },
		{ H1, "/foo/bar/../x/", H1"foo/x/" },
		{ H1, "/foo/..", H1"" },
		{ H1, "/foo/../..", H1"" },
		{ H1, "/foo/../../..", H1"" },
		{ H1, "/foo/../../bar/../../baz", H1"baz" },
		{ H1, "/a/b/../../c", H1"c" },
		{ H1, "/./a/../b", H1"b" },
		{ H1, ".x", H1".x" },
		{ H1, "..x", H1"..x" },
		{ H1, "foo/.x", H1"foo/.x" },
		{ H1, "foo/bar/.x", H1"foo/bar/.x" },
		{ H1, "foo/..x", H1"foo/..x" },
		{ H1, "foo/bar/..x", H1"foo/bar/..x" },
		{ H1, "/x.php?y=ftp://example.com/&z=1_2", H1"x.php?y=ftp://example.com/&z=1_2" },
		{ H1, "//x.y.com/", "http://x.y.com/" },
		{ H1, "http://x.y.com/", "http://x.y.com/" },
#undef H1
#define H1 "http://x.tld/file"
#define R1 "http://x.tld/"
		{ H1, "", R1"" },
		{ H1, ".", R1"" },
		{ H1, "./", R1"" },
		{ H1, "..", R1"" },
		{ H1, "../", R1"" },
		{ H1, "foo", R1"foo" },
		{ H1, "foo/bar", R1"foo/bar" },
		{ H1, "foo///bar", R1"foo/bar" },
		{ H1, "foo/.", R1"foo/" },
		{ H1, "foo/./", R1"foo/" },
		{ H1, "foo./", R1"foo./" },
		{ H1, "foo/../bar", R1"bar" },
		{ H1, "foo/../bar/", R1"bar/" },
		{ H1, "foo/bar/..", R1"foo/" },
		{ H1, "foo/bar/../x", R1"foo/x" },
		{ H1, "foo/bar/../x/", R1"foo/x/" },
		{ H1, "foo/..", R1"" },
		{ H1, "foo/../..", R1"" },
		{ H1, "foo/../../..", R1"" },
		{ H1, "foo/../../bar/../../baz", R1"baz" },
		{ H1, "a/b/../../c", R1"c" },
		{ H1, "./a/../b", R1"b" },
		{ H1, "/", R1"" },
		{ H1, "/.", R1"" },
		{ H1, "/./", R1"" },
		{ H1, "/..", R1"" },
		{ H1, "/../", R1"" },
		{ H1, "/foo", R1"foo" },
		{ H1, "/foo/bar", R1"foo/bar" },
		{ H1, "/foo///bar", R1"foo/bar" },
		{ H1, "/foo/.", R1"foo/" },
		{ H1, "/foo/./", R1"foo/" },
		{ H1, "/foo./", R1"foo./" },
		{ H1, "/foo/../bar", R1"bar" },
		{ H1, "/foo/../bar/", R1"bar/" },
		{ H1, "/foo/bar/..", R1"foo/" },
		{ H1, "/foo/bar/../x", R1"foo/x" },
		{ H1, "/foo/bar/../x/", R1"foo/x/" },
		{ H1, "/foo/..", R1"" },
		{ H1, "/foo/../..", R1"" },
		{ H1, "/foo/../../..", R1"" },
		{ H1, "/foo/../../bar/../../baz", R1"baz" },
		{ H1, "/a/b/../../c", R1"c" },
		{ H1, "/./a/../b", R1"b" },
		{ H1, ".x", R1".x" },
		{ H1, "..x", R1"..x" },
		{ H1, "foo/.x", R1"foo/.x" },
		{ H1, "foo/bar/.x", R1"foo/bar/.x" },
		{ H1, "foo/..x", R1"foo/..x" },
		{ H1, "foo/bar/..x", R1"foo/bar/..x" },
		{ H1, "/x.php?y=ftp://example.com/&z=1_2", R1"x.php?y=ftp://example.com/&z=1_2" },
		{ H1, "//x.y.com/", "http://x.y.com/" },
		{ H1, "http://x.y.com/", "http://x.y.com/" },
#undef H1
#undef R1
#define H1 "http://x.tld/dir/"
#define R1 "http://x.tld/"
		{ H1, "", H1"" },
		{ H1, ".", H1"" },
		{ H1, "./", H1"" },
		{ H1, "..", R1"" },
		{ H1, "../", R1"" },
		{ H1, "foo", H1"foo" },
		{ H1, "foo/bar", H1"foo/bar" },
		{ H1, "foo///bar", H1"foo/bar" },
		{ H1, "foo/.", H1"foo/" },
		{ H1, "foo/./", H1"foo/" },
		{ H1, "foo./", H1"foo./" },
		{ H1, "foo/../bar", H1"bar" },
		{ H1, "foo/../bar/", H1"bar/" },
		{ H1, "foo/bar/..", H1"foo/" },
		{ H1, "foo/bar/../x", H1"foo/x" },
		{ H1, "foo/bar/../x/", H1"foo/x/" },
		{ H1, "foo/..", H1"" },
		{ H1, "foo/../..", R1"" },
		{ H1, "foo/../../..", R1"" },
		{ H1, "foo/../../bar/../../baz", R1"baz" },
		{ H1, "a/b/../../c", H1"c" },
		{ H1, "./a/../b", H1"b" },
		{ H1, "/", R1"" },
		{ H1, "/.", R1"" },
		{ H1, "/./", R1"" },
		{ H1, "/..", R1"" },
		{ H1, "/../", R1"" },
		{ H1, "/foo", R1"foo" },
		{ H1, "/foo/bar", R1"foo/bar" },
		{ H1, "/foo///bar", R1"foo/bar" },
		{ H1, "/foo/.", R1"foo/" },
		{ H1, "/foo/./", R1"foo/" },
		{ H1, "/foo./", R1"foo./" },
		{ H1, "/foo/../bar", R1"bar" },
		{ H1, "/foo/../bar/", R1"bar/" },
		{ H1, "/foo/bar/..", R1"foo/" },
		{ H1, "/foo/bar/../x", R1"foo/x" },
		{ H1, "/foo/bar/../x/", R1"foo/x/" },
		{ H1, "/foo/..", R1"" },
		{ H1, "/foo/../..", R1"" },
		{ H1, "/foo/../../..", R1"" },
		{ H1, "/foo/../../bar/../../baz", R1"baz" },
		{ H1, "/a/b/../../c", R1"c" },
		{ H1, "/./a/../b", R1"b" },
		{ H1, ".x", H1".x" },
		{ H1, "..x", H1"..x" },
		{ H1, "foo/.x", H1"foo/.x" },
		{ H1, "foo/bar/.x", H1"foo/bar/.x" },
		{ H1, "foo/..x", H1"foo/..x" },
		{ H1, "foo/bar/..x", H1"foo/bar/..x" },
		{ H1, "/x.php?y=ftp://example.com/&z=1_2", R1"x.php?y=ftp://example.com/&z=1_2" },
		{ H1, "//x.y.com/", "http://x.y.com/" },
		{ H1, "http://x.y.com/", "http://x.y.com/" }
#undef H1
#undef R1
	};
	unsigned it;
	char uri_buf_static[32]; // use a size that forces allocation in some cases
	mget_buffer_t *uri_buf = 	mget_buffer_init(NULL, uri_buf_static, sizeof(uri_buf_static));
	mget_iri_t *base;

	for (it = 0; it < countof(test_data); it++) {
		const struct iri_test_data *t = &test_data[it];

		base = mget_iri_parse(t->base, "utf-8");
		mget_iri_relative_to_abs(base, t->relative, strlen(t->relative), uri_buf);

		if (!strcmp(uri_buf->data, t->result))
			ok++;
		else {
			failed++;
			info_printf("Failed [%u]: %s+%s -> %s (expected %s)\n", it, t->base, t->relative, uri_buf->data, t->result);
		}

		mget_iri_free(&base);
	}

	mget_buffer_free(&uri_buf);
}
Exemplo n.º 6
0
Arquivo: test.c Projeto: BIllli/mget
static void test_iri_parse(void)
{
	const struct iri_test_data {
		const char
			*uri,
			*display,
			*scheme,
			*userinfo,
			*password,
			*host,
			*port,
			*path,
			*query,
			*fragment;
	} test_data[] = {
		{ "1.2.3.4", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "1.2.3.4", NULL, NULL, NULL, NULL},
		{ "1.2.3.4:987", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "1.2.3.4", "987", NULL, NULL, NULL},
		{ "//example.com/thepath", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", NULL, "thepath", NULL, NULL},
		// { "///thepath", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, NULL, NULL, "thepath", NULL, NULL},
		{ "example.com", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", NULL, NULL, NULL, NULL},
		{ "example.com:555", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", "555", NULL, NULL, NULL},
		{ "http://example.com", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", NULL, NULL, NULL, NULL},
		{ "http://example.com:", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", NULL, NULL, NULL, NULL},
		{ "http://example.com:/", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", NULL, "", NULL, NULL},
		{ "http://example.com:80/", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", NULL, "", NULL, NULL},
		{ "https://example.com", NULL, MGET_IRI_SCHEME_HTTPS, NULL, NULL, "example.com", NULL, NULL, NULL, NULL},
		{ "https://example.com:443", NULL, MGET_IRI_SCHEME_HTTPS, NULL, NULL, "example.com", NULL, NULL, NULL, NULL},
		{ "https://example.com:444", NULL, MGET_IRI_SCHEME_HTTPS, NULL, NULL, "example.com", "444", NULL, NULL, NULL},
		{ "http://example.com:80", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", NULL, NULL, NULL, NULL},
		{ "http://example.com:81", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", "81", NULL, NULL, NULL},
		{ "http://example.com/index.html", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", NULL, "index.html", NULL, NULL},
		{ "http://example.com/index.html?query#frag", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", NULL, "index.html", "query", "frag"},
		{ "http://example.com/index.html?#", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "example.com", NULL, "index.html", "", ""},
		{ "碼標準萬國碼.com", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "xn--9cs565brid46mda086o.com", NULL, NULL, NULL, NULL},
		//		{ "ftp://cnn.example.com&[email protected]/top_story.htm", NULL,"ftp",NULL,NULL,"cnn.example.com",NULL,NULL,"[email protected]/top_story.htm",NULL }
		{ "ftp://[email protected]/top_story.htm", NULL, "ftp", NULL, NULL, "cnn.example.com", NULL, NULL, "[email protected]/top_story.htm", NULL},
//		{ "site;sub:.html", NULL, MGET_IRI_SCHEME_HTTP, NULL, NULL, "site", NULL, ";sub:.html", NULL, NULL},
	};
	unsigned it;

	for (it = 0; it < countof(test_data); it++) {
		const struct iri_test_data *t = &test_data[it];
		mget_iri_t *iri = mget_iri_parse(t->uri, "utf-8");

		if (mget_strcmp(iri->display, t->display)
			|| mget_strcmp(iri->scheme, t->scheme)
			|| mget_strcmp(iri->userinfo, t->userinfo)
			|| mget_strcmp(iri->password, t->password)
			|| mget_strcmp(iri->host, t->host)
			|| mget_strcmp(iri->port, t->port)
			|| mget_strcmp(iri->path, t->path)
			|| mget_strcmp(iri->query, t->query)
			|| mget_strcmp(iri->fragment, t->fragment))
		{
			failed++;
			printf("IRI test #%u failed:\n", it + 1);
			printf(" [%s]\n", iri->uri);
			printf("  display %s (expected %s)\n", iri->display, t->display);
			printf("  scheme %s (expected %s)\n", iri->scheme, t->scheme);
			printf("  user %s (expected %s)\n", iri->userinfo, t->userinfo);
			printf("  host %s (expected %s)\n", iri->host, t->host);
			printf("  port %s (expected %s)\n", iri->port, t->port);
			printf("  path %s (expected %s)\n", iri->path, t->path);
			printf("  query %s (expected %s)\n", iri->query, t->query);
			printf("  fragment %s (expected %s)\n", iri->fragment, t->fragment);
			printf("\n");
		} else {
			ok++;
		}

		mget_iri_free(&iri);
	}
}
Exemplo n.º 7
0
Arquivo: test.c Projeto: BIllli/mget
static void test_stringmap(void)
{
	mget_stringmap_t *m;
	char key[128], value[128], *val;
	int run, it;
	size_t valuesize;

	// the initial size of 16 forces the internal reshashing function to be called twice

	m = mget_stringmap_create(16);

	for (run = 0; run < 2; run++) {
		if (run) {
			mget_stringmap_clear(m);
			mget_stringmap_sethashfunc(m, hash_txt);
		}

		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			valuesize = sprintf(value, "%d.html", it);
			if (mget_stringmap_put(m, key, value, valuesize + 1)) {
				failed++;
				info_printf("stringmap_put(%s) returns unexpected old value\n", key);
			} else ok++;
		}

		if ((it = mget_stringmap_size(m)) != 26) {
			failed++;
			info_printf("stringmap_size() returned %d (expected %d)\n", it, 26);
		} else ok++;

		// now, look up every single entry
		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			sprintf(value, "%d.html", it);
			if (!(val = mget_stringmap_get(m, key))) {
				failed++;
				info_printf("stringmap_get(%s) didn't find entry\n", key);
			} else if (strcmp(val, value)) {
				failed++;
				info_printf("stringmap_get(%s) found '%s' (expected '%s')\n", key, val, value);
			} else ok++;
		}

		mget_stringmap_clear(m);

		if ((it = mget_stringmap_size(m)) != 0) {
			failed++;
			info_printf("stringmap_size() returned %d (expected 0)\n", it);
		} else ok++;

		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			valuesize = sprintf(value, "%d.html", it);
			if (mget_stringmap_put(m, key, value, valuesize + 1)) {
				failed++;
				info_printf("stringmap_put(%s) returns unexpected old value\n", key);
			} else ok++;
		}

		if ((it = mget_stringmap_size(m)) != 26) {
			failed++;
			info_printf("stringmap_size() returned %d (expected %d)\n", it, 26);
		} else ok++;

		// now, remove every single entry
		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			sprintf(value, "%d.html", it);
			mget_stringmap_remove(m, key);
		}

		if ((it = mget_stringmap_size(m)) != 0) {
			failed++;
			info_printf("stringmap_size() returned %d (expected 0)\n", it);
		} else ok++;

		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			valuesize = sprintf(value, "%d.html", it);
			if (mget_stringmap_put(m, key, value, valuesize + 1)) {
				failed++;
				info_printf("stringmap_put(%s) returns unexpected old value\n", key);
			} else ok++;
		}

		if ((it = mget_stringmap_size(m)) != 26) {
			failed++;
			info_printf("stringmap_size() returned %d (expected %d)\n", it, 26);
		} else ok++;
	}

	// testing alloc/free in stringmap/hashmap
	mget_stringmap_clear(m);
	mget_stringmap_put(m, "thekey", NULL, 0) ? failed++ : ok++;
	mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;
	mget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++;
	mget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++;
	mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;

	// testing key/value identity alloc/free in stringmap/hashmap
	mget_stringmap_clear(m);
	mget_stringmap_put(m, "thekey", NULL, 0) ? failed++ : ok++;
	mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;
	mget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++;
	mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;

	mget_stringmap_free(&m);

	mget_http_challenge_t challenge;
	mget_http_parse_challenge("Basic realm=\"test realm\"", &challenge);
	mget_http_free_challenge(&challenge);

	mget_vector_t *challenges;
	challenges = mget_vector_create(2, 2, NULL);
	mget_vector_set_destructor(challenges, (void(*)(void *))mget_http_free_challenge);
	mget_http_parse_challenge("Basic realm=\"test realm\"", &challenge);
	mget_vector_add(challenges, &challenge, sizeof(challenge));
	mget_http_free_challenges(&challenges);

	char *response_text = strdup(
"HTTP/1.1 401 Authorization Required\r\n"\
"Date: Sun, 23 Dec 2012 21:03:45 GMT\r\n"\
"Server: Apache/2.2.22 (Debian)\r\n"\
"WWW-Authenticate: Digest realm=\"therealm\", nonce=\"Ip6MaovRBAA=c4af733c51270698260f5d357724c2cbce20fa3d\", algorithm=MD5, domain=\"/prot_digest_md5\", qop=\"auth\"\r\n"\
"Vary: Accept-Encoding\r\n"\
"Content-Length: 476\r\n"\
"Keep-Alive: timeout=5, max=99\r\n"\
"Connection: Keep-Alive\r\n"\
"Content-Type: text/html; charset=iso-8859-1\r\n\r\n");

	mget_iri_t *iri = mget_iri_parse("http://localhost/prot_digest_md5/", NULL);
	mget_http_request_t *req = mget_http_create_request(iri, "GET");
	mget_http_response_t *resp = mget_http_parse_response_header(response_text);
	mget_http_add_credentials(req, mget_vector_get(resp->challenges, 0), "tim", "123");
//	for (it=0;it<vec_size(req->lines);it++) {
//		info_printf("%s\n", (char *)vec_get(req->lines, it));
//	}
	mget_http_free_response(&resp);
	mget_http_free_request(&req);
	mget_iri_free(&iri);
	xfree(response_text);

// Authorization: Digest username="******", realm="therealm", nonce="Ip6MaovRBAA=c4af733c51270698260f5d357724c2cbce20fa3d", uri="/prot_digest_md5/", response="a99e2012d507a73dd46eb044d3f4641c", qop=auth, nc=00000001, cnonce="3d20faa1"

}
Exemplo n.º 8
0
Arquivo: iri.c Projeto: BIllli/mget
mget_iri_t *mget_iri_parse(const char *url, const char *encoding)
{
	mget_iri_t *iri;
	const char *default_port = NULL;
	char *p, *s, *authority, c;
	size_t slen, it;
	int url_allocated, maybe_scheme;

	if (!url)
		return NULL;

	/*
		URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
		hier-part   = "//" authority path-abempty / path-absolute / path-rootless / path-empty
		scheme      =  ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
	 */
	while (isspace(*url)) url++;
	if (!*url) return NULL;

	// first unescape, than convert to UTF-8
	if (strchr(url, '%')) {
		char *unesc_url = strdup(url);

		mget_percent_unescape(unesc_url);

		if (mget_str_needs_encoding(unesc_url)) {
			if ((url = mget_str_to_utf8(unesc_url, encoding)))
				xfree(unesc_url);
			else
				url = unesc_url; // on error, use what we have
		} else
			url = unesc_url;

		url_allocated = 1;
	} else {
		url_allocated = 0;

		if (mget_str_needs_encoding(url)) {
			if ((s = mget_str_to_utf8(url, encoding))) {
				url = s;
				url_allocated = 1;
			}
		}
	}

	// just use one block of memory for all parsed URI parts
	slen = strlen(url);
	iri = xmalloc(sizeof(mget_iri_t) + slen * 2 + 2);
	memset(iri, 0, sizeof(mget_iri_t));
	strcpy(((char *)iri) + sizeof(mget_iri_t), url);
	iri->uri = ((char *)iri) + sizeof(mget_iri_t);
	s = ((char *)iri) + sizeof(mget_iri_t) + slen + 1;
	strcpy(s, url);
	if (url_allocated)
		xfree(url);

	p = s;
	if (isalpha(*p)) {
		maybe_scheme = 1;
		while (*s && !_iri_isgendelim(*s)) {
			if (maybe_scheme && !_iri_isscheme(*s))
				maybe_scheme = 0;
			s++;
		}
	} else
		maybe_scheme = 0;

	if (maybe_scheme && (*s == ':' && (s[1] == '/' || s[1] == 0))) {
		// found a scheme
		*s++ = 0;

		// find the scheme in our static list of supported schemes
		// for later comparisons we compare pointers (avoiding strcasecmp())
		iri->scheme = p;
		for (it = 0; mget_iri_schemes[it]; it++) {
			if (!mget_strcasecmp_ascii(mget_iri_schemes[it], p)) {
				iri->scheme = mget_iri_schemes[it];
				default_port = iri_ports[it];
				break;
			}
		}

		if (iri->scheme == p) {
			// convert scheme to lowercase
			mget_strtolower((char *)iri->scheme);
		}

	} else {
		iri->scheme = MGET_IRI_SCHEME_DEFAULT;
		default_port = iri_ports[0]; // port 80
		s = p; // rewind
	}

	// this is true for http, https, ftp, file
	if (s[0] == '/' && s[1] == '/')
		s += 2;

	// authority
	authority = s;
	while (*s && *s != '/' && *s != '?' && *s != '#')
		s++;
	c = *s;
	if (c) *s++ = 0;

	// left over: [path][?query][#fragment]
	if (c == '/') {
		iri->path = s;
		while (*s && *s != '?' && *s != '#')
			s++;
		c = *s;
		if (c) *s++ = 0;
	}

	if (c == '?') {
		iri->query = s;
		while (*s && *s != '#')
			s++;
		c = *s;
		if (c) *s++ = 0;
	}

	if (c == '#') {
		iri->fragment = s;
		s += strlen(s);
	}

	if (*s) {
		debug_printf("unparsed rest '%s'\n", s);
	}

	if (*authority) {
		s = authority;
		p = strchr(authority, '@');
		if (p) {
			iri->userinfo = s;
			*p = 0;
			s = p + 1;
		}
		if (*s == '[') {
			p = strrchr(s, ']');
			if (p) {
				iri->host = s + 1;
				*p = 0;
				s = p + 1;
			} else {
				// something is broken
				iri->host = s + 1;
				s += strlen(s);
			}
		} else {
			iri->host = s;
			while (*s && *s != ':')
				s++;
		}
		if (*s == ':') {
			if (s[1]) {
				if (!default_port || (strcmp(s + 1, default_port) && atoi(s + 1) != atoi(default_port)))
					iri->port = s + 1;
			}
		}
		*s = 0;
 	}

	iri->resolv_port = iri->port ? iri->port : default_port;

	// now unescape all components (not interested in display, userinfo, password)
	if (iri->host) {
		mget_strtolower((char *)iri->host);
		if ((p = (char *)mget_str_to_ascii(iri->host)) != iri->host) {
			iri->host = p;
			iri->host_allocated = 1;
		}
	}
	else {
		if (iri->scheme == MGET_IRI_SCHEME_HTTP || iri->scheme == MGET_IRI_SCHEME_HTTPS) {
			error_printf(_("Missing host/domain in URI '%s'\n"), iri->uri);
			mget_iri_free(&iri);
			return NULL;
		}
	}

/*
	debug_printf("scheme=%s\n",iri->scheme);
	debug_printf("host=%s\n",iri->host);
	debug_printf("path=%s\n",iri->path);
	debug_printf("query=%s\n",iri->query);
	debug_printf("fragment=%s\n",iri->fragment);
*/

	return iri;
}
Exemplo n.º 9
0
int main(int argc, const char *const *argv)
{
    // Base URI for converting relative to absolute URIs
    const char *
    base = "http://www.example.com";

    // We assume that base is encoded in the local charset.
    const char *
    local_encoding = mget_local_charset_encoding();

    // parsed 'base'
    mget_iri_t
    *base_uri;

    // Character encoding of CSS file content
    // An HTTP response may contain the encoding in the Content-Type header,
    // but if
    // see http://stackoverflow.com/questions/2526033/why-specify-charset-utf-8-in-your-css-file
    const char *
    css_encoding = NULL;

    int
    argpos;

    // We want the libmget error messages be printed to STDERR.
    // From here on, we can call mget_error_printf, etc.
    mget_logger_set_stream(mget_get_logger(MGET_LOGGER_ERROR), stderr);

    // We want the libmget info messages be printed to STDOUT.
    // From here on, we can call mget_info_printf, etc.
    mget_logger_set_stream(mget_get_logger(MGET_LOGGER_INFO), stdout);

    // parse options
    for (argpos = 1; argpos < argc; argpos++) {
        if (!strcmp(argv[argpos], "--base") && argc - argpos > 1) {
            base = argv[++argpos];
            info_printf("Base URL encoding = '%s'\n", local_encoding);
        } else if (!strcmp(argv[argpos], "--encoding") && argc - argpos > 1) {
            css_encoding = argv[++argpos];
        } else if (!strcmp(argv[argpos], "--")) {
            argpos++;
            break;
        } else if (argv[argpos][0] == '-') {
            usage(argv[0]);
        } else
            break;
    }

    // All URIs are converted into UTF-8 charset.
    // That's why we need the local encoding (aka 'encoding of base URI') here.
    base_uri = mget_iri_parse(base, local_encoding);

    for (; argpos < argc; argpos++) {
        // use '-' as filename for STDIN
        css_parse_localfile(argv[argpos], base_uri, css_encoding);
    }

    mget_iri_free(&base_uri);

    return 0;
}
Exemplo n.º 10
0
MGET_IRI *mget_iri_parse(const char *s_uri, const char *encoding)
{
	MGET_IRI *iri;
	const char *default_port = NULL;
	char *p, *s, *authority, c;
	size_t slen, it;

	if (!s_uri)
		return NULL;

	/*
		URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
		hier-part   = "//" authority path-abempty / path-absolute / path-rootless / path-empty
		scheme      =  ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
	 */
	while (isspace(*s_uri)) s_uri++;
	if (!*s_uri) return NULL;

	// just use one block of memory for all parsed URI parts
	slen = strlen(s_uri);
	iri = xmalloc(sizeof(MGET_IRI) + slen * 2 + 2);
	memset(iri, 0, sizeof(MGET_IRI));
	strcpy(((char *)iri) + sizeof(MGET_IRI), s_uri);
	iri->uri = ((char *)iri) + sizeof(MGET_IRI);
	s = ((char *)iri) + sizeof(MGET_IRI) + slen + 1;
	strcpy(s, s_uri);

	p = s;
	while (*s && !_iri_isgendelim(*s))
		s++;

	if (*s == ':' && s[1]=='/') {
		// found a scheme
		*s++ = 0;

		// find the scheme in our static list of supported schemes
		// for later comparisons we compare pointers (avoiding strcasecmnp())
		iri->scheme = p;
		for (it = 0; iri_schemes[it]; it++) {
			if (!strcasecmp(iri_schemes[it], p)) {
				iri->scheme = iri_schemes[it];
				default_port = iri_ports[it];
				break;
			}
		}

		if (iri->scheme == p) {
			// convert scheme to lowercase
			for (; *p; p++)
				if (isupper(*p))
					*p = tolower(*p);
		}

	} else {
		iri->scheme = IRI_SCHEME_DEFAULT;
		default_port = iri_ports[0]; // port 80
		s = p; // rewind
	}

	// this is true for http, https, ftp, file
	if (s[0] == '/' && s[1] == '/')
		s += 2;

	// authority
	authority = s;
	while (*s && *s != '/' && *s != '?' && *s != '#')
		s++;
	c = *s;
	if (c) *s++ = 0;

	// left over: [path][?query][#fragment]
	if (c == '/') {
		iri->path = s;
		while (*s && *s != '?' && *s != '#')
			s++;
		c = *s;
		if (c) *s++ = 0;
	}

	if (c == '?') {
		iri->query = s;
		while (*s && *s != '#')
			s++;
		c = *s;
		if (c) *s++ = 0;
	}

	if (c == '#') {
		iri->fragment = s;
		while (*s)
			s++;
	}

	if (*s) {
		debug_printf("unparsed rest '%s'\n", s);
	}

	if (*authority) {
		s = authority;
		p = strchr(authority, '@');
		if (p) {
			iri->userinfo = s;
			*p = 0;
			s = p + 1;
		}
		if (*s == '[') {
			p = strrchr(s, ']');
			if (p) {
				iri->host = s + 1;
				*p = 0;
				s = p + 1;
			} else {
				// something is broken
				iri->host = s + 1;
				while (*s) s++;
			}
		} else {
			iri->host = s;
			while (*s && *s != ':')
				s++;
		}
		if (*s == ':') {
			if (s[1]) {
				if (!default_port || (strcmp(s + 1, default_port) && atoi(s + 1) != atoi(default_port)))
					iri->port = s + 1;
			}
		}
		*s = 0;
/*
		for (p = (char *)iri->host; *p; p++)
			if (*p >= 'A' && *p <= 'Z') // isupper() also returns true for chars > 0x7f, the test is not EBCDIC compatible ;-)
				*p = tolower(*p);
*/
 	}

	iri->resolv_port = iri->port ? iri->port : default_port;

	// now unescape all components (not interested in display, userinfo, password
	if (iri->host) {
		const char *host_utf;
		char *p;

		_unescape((unsigned char *)iri->host);

		host_utf = mget_str_to_utf8(iri->host, encoding);

		if (host_utf) {
			char *host_asc = NULL;
			int rc;

			if ((rc = idna_to_ascii_8z(host_utf, &host_asc, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
				// log_printf("toASCII '%s' -> '%s'\n", host_utf, host_asc);
				iri->host = host_asc;
				iri->host_allocated = 1;
			} else
				error_printf(_("toASCII failed (%d): %s\n"), rc, idna_strerror(rc));

			xfree(host_utf);
		}

		for (p = (char *)iri->host; *p; p++)
			if (*p >= 'A' && *p <= 'Z') // isupper() also returns true for chars > 0x7f, the test is not EBCDIC compatible ;-)
				*p = tolower(*p);
	}
	else {
		if (iri->scheme == IRI_SCHEME_HTTP || iri->scheme == IRI_SCHEME_HTTPS) {
			error_printf(_("Missing host/domain in URI '%s'\n"), iri->uri);
			mget_iri_free(&iri);
			return NULL;
		}
	}
	if (iri->path)
		_unescape((unsigned char *)iri->path);
	if (iri->query)
		_unescape((unsigned char *)iri->query);
	if (iri->fragment)
		_unescape((unsigned char *)iri->fragment);

//	info_printf("%s: path '%s'\n", iri->uri, iri->path);

	return iri;
}