Ejemplo n.º 1
0
int mget_cookie_load_public_suffixes(const char *fname)
{
	PUBLIC_SUFFIX suffix, *suffixp;
	FILE *fp;
	int nsuffixes = 0;
	char *buf = NULL, *linep, *p;
	size_t bufsize = 0;
	ssize_t buflen;

	// as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules
	// and 40 exceptions.
	if (!suffixes)
		suffixes = mget_vector_create(6*1024, -2, (int(*)(const void *, const void *))suffix_compare);
	if (!suffix_exceptions)
		suffix_exceptions = mget_vector_create(64, -2, (int(*)(const void *, const void *))suffix_compare);

	if ((fp = fopen(fname, "r"))) {
		while ((buflen = mget_getline(&buf, &bufsize, fp)) >= 0) {
			linep = buf;

			while (isspace(*linep)) linep++; // ignore leading whitespace
			if (!*linep) continue; // skip empty lines

			if (*linep == '/' && linep[1] == '/')
				continue; // skip comments

			// parse suffix rule
			for (p = linep; *linep && !isspace(*linep);) linep++;
			*linep = 0;
			if (*p == '!') {
				// add to exceptions
				suffix_init(&suffix, p + 1, linep - p - 1);
				suffixp = mget_vector_get(suffix_exceptions, mget_vector_add(suffix_exceptions, &suffix, sizeof(suffix)));
			} else {
				suffix_init(&suffix, p, linep - p);
				suffixp = mget_vector_get(suffixes, mget_vector_add(suffixes, &suffix, sizeof(suffix)));
			}

			if (suffixp)
				suffixp->label = suffixp->label_buf; // set label to changed address

			nsuffixes++;;
		}

		xfree(buf);
		fclose(fp);

		mget_vector_sort(suffix_exceptions);
		mget_vector_sort(suffixes);

	} else
		error_printf(_("Failed to open public suffix file '%s'\n"), fname);

	return nsuffixes;
}
Ejemplo n.º 2
0
static void _rss_get_url(void *context, int flags, const char *dir, const char *attr, const char *val, size_t len, size_t pos G_GNUC_MGET_UNUSED)
{
	struct rss_context *ctx = context;
	mget_string_t url;

	if (!val || !len)
		return;

	url.p = NULL;

	if ((flags & XML_FLG_ATTRIBUTE)) {
		if (!mget_strcasecmp_ascii(attr, "url") || !mget_strcasecmp_ascii(attr, "href")
			|| !mget_strcasecmp_ascii(attr, "src") || !mget_strcasecmp_ascii(attr, "domain")
			|| !mget_strcasecmp_ascii(attr, "xmlns") || !mget_strncasecmp_ascii(attr, "xmlns:", 6))
		{
			for (;len && isspace(*val); val++, len--); // skip leading spaces
			for (;len && isspace(val[len - 1]); len--);  // skip trailing spaces

			url.p = val;
			url.len = len;

			if (!ctx->urls)
				ctx->urls = mget_vector_create(32, -2, NULL);

			mget_vector_add(ctx->urls, &url, sizeof(url));
		}
	}
	else if ((flags & XML_FLG_CONTENT)) {
		const char *elem = strrchr(dir, '/');

		if (elem) {
			elem++;

			if (!mget_strcasecmp_ascii(elem, "guid") || !mget_strcasecmp_ascii(elem, "link")
				 || !mget_strcasecmp_ascii(elem, "comments") || !mget_strcasecmp_ascii(elem, "docs"))
			{
				for (;len && isspace(*val); val++, len--); // skip leading spaces
				for (;len && isspace(val[len - 1]); len--);  // skip trailing spaces

				// debug_printf("#2 %02X %s %s '%.*s' %zd\n", flags, dir, attr, (int) len, val, len);

				url.p = val;
				url.len = len;

				if (!ctx->urls)
					ctx->urls = mget_vector_create(32, -2, NULL);

				mget_vector_add(ctx->urls, &url, sizeof(url));
			}
		}
	}
}
Ejemplo n.º 3
0
void mget_cookie_store_cookie(MGET_COOKIE *cookie)
{
	MGET_COOKIE *old;
	int pos;

	debug_printf("got cookie %s=%s\n", cookie->name, cookie->value);

	if (!cookie->normalized)
		return;

	pthread_mutex_lock(&cookies_mutex);

	if (!cookies) {
		cookies = mget_vector_create(128, -2, (int(*)(const void *, const void *))compare_cookie);
		old = NULL;
	} else
		old = mget_vector_get(cookies, pos = mget_vector_find(cookies, cookie));

	if (old) {
		debug_printf("replace old cookie %s=%s\n", cookie->name, cookie->value);
		cookie->creation = old->creation;
		mget_cookie_free_cookie(old);
		mget_vector_replace(cookies, cookie, sizeof(*cookie), pos);
	} else {
		debug_printf("store new cookie %s=%s\n", cookie->name, cookie->value);
		mget_vector_insert_sorted(cookies, cookie, sizeof(*cookie));
	}

	pthread_mutex_unlock(&cookies_mutex);
}
Ejemplo n.º 4
0
Archivo: cookie.c Proyecto: BIllli/mget
mget_cookie_db_t *mget_cookie_db_init(mget_cookie_db_t *cookie_db)
{
	if (!cookie_db)
		cookie_db = xmalloc(sizeof(mget_cookie_db_t));

	memset(cookie_db, 0, sizeof(*cookie_db));
	cookie_db->cookies = mget_vector_create(32, -2, (int(*)(const void *, const void *))_compare_cookie);
	mget_vector_set_destructor(cookie_db->cookies, (void(*)(void *))mget_cookie_deinit);
	mget_thread_mutex_init(&cookie_db->mutex);
#ifdef WITH_LIBPSL
	cookie_db->psl = (psl_ctx_t *)psl_builtin();
#endif

	return cookie_db;
}
Ejemplo n.º 5
0
Archivo: test.c Proyecto: BIllli/mget
static void test_vector(void)
{
	struct ENTRY
		*tmp,
		txt_sorted[5] = { {""}, {"four"}, {"one"}, {"three"}, {"two"} },
		*txt[countof(txt_sorted)];
	mget_vector_t
		*v = mget_vector_create(2, -2, (int(*)(const void *, const void *))compare_txt);
	unsigned
		it;
	int
		n;

	// copy
	for (it = 0; it < countof(txt); it++)
		txt[it] = &txt_sorted[it];

	// shuffle txt
	for (it = 0; it < countof(txt); it++) {
		n = rand() % countof(txt);
		tmp = txt[n];
		txt[n] = txt[it];
		txt[it] = tmp;
	}

	for (it = 0; it < countof(txt); it++) {
		mget_vector_insert_sorted(v, txt[it], sizeof(struct ENTRY));
	}

	for (it = 0; it < countof(txt); it++) {
		struct ENTRY *e = mget_vector_get(v, it);
		if (!strcmp(e->txt,txt_sorted[it].txt))
			ok++;
		else
			failed++;
	}

	mget_vector_free(&v);
}
Ejemplo n.º 6
0
Archivo: job.c Proyecto: darnir/mget
void job_create_parts(JOB *job)
{
	PART part;
	mget_metalink_t *metalink;
	ssize_t fsize;
	int it;

	if (!job || !(metalink = job->metalink))
		return;

	memset(&part, 0, sizeof(PART));

	// create space to hold enough parts
	if (!job->parts)
		job->parts = mget_vector_create(mget_vector_size(metalink->pieces), 4, NULL);
	else
		mget_vector_clear(job->parts);

	fsize = metalink->size;

	for (it = 0; it < mget_vector_size(metalink->pieces); it++) {
		mget_metalink_piece_t *piece = mget_vector_get(metalink->pieces, it);

		if (fsize >= piece->length) {
			part.length = piece->length;
		} else {
			part.length = fsize;
		}

		part.id = it + 1;

		mget_vector_add(job->parts, &part, sizeof(PART));

		part.position += part.length;
		fsize -= piece->length;
	}
}
Ejemplo n.º 7
0
Archivo: test.c Proyecto: BIllli/mget
static void test_stringmap(void)
{
	mget_stringmap_t *m;
	char key[128], value[128], *val;
	int run, it;
	size_t valuesize;

	// the initial size of 16 forces the internal reshashing function to be called twice

	m = mget_stringmap_create(16);

	for (run = 0; run < 2; run++) {
		if (run) {
			mget_stringmap_clear(m);
			mget_stringmap_sethashfunc(m, hash_txt);
		}

		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			valuesize = sprintf(value, "%d.html", it);
			if (mget_stringmap_put(m, key, value, valuesize + 1)) {
				failed++;
				info_printf("stringmap_put(%s) returns unexpected old value\n", key);
			} else ok++;
		}

		if ((it = mget_stringmap_size(m)) != 26) {
			failed++;
			info_printf("stringmap_size() returned %d (expected %d)\n", it, 26);
		} else ok++;

		// now, look up every single entry
		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			sprintf(value, "%d.html", it);
			if (!(val = mget_stringmap_get(m, key))) {
				failed++;
				info_printf("stringmap_get(%s) didn't find entry\n", key);
			} else if (strcmp(val, value)) {
				failed++;
				info_printf("stringmap_get(%s) found '%s' (expected '%s')\n", key, val, value);
			} else ok++;
		}

		mget_stringmap_clear(m);

		if ((it = mget_stringmap_size(m)) != 0) {
			failed++;
			info_printf("stringmap_size() returned %d (expected 0)\n", it);
		} else ok++;

		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			valuesize = sprintf(value, "%d.html", it);
			if (mget_stringmap_put(m, key, value, valuesize + 1)) {
				failed++;
				info_printf("stringmap_put(%s) returns unexpected old value\n", key);
			} else ok++;
		}

		if ((it = mget_stringmap_size(m)) != 26) {
			failed++;
			info_printf("stringmap_size() returned %d (expected %d)\n", it, 26);
		} else ok++;

		// now, remove every single entry
		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			sprintf(value, "%d.html", it);
			mget_stringmap_remove(m, key);
		}

		if ((it = mget_stringmap_size(m)) != 0) {
			failed++;
			info_printf("stringmap_size() returned %d (expected 0)\n", it);
		} else ok++;

		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			valuesize = sprintf(value, "%d.html", it);
			if (mget_stringmap_put(m, key, value, valuesize + 1)) {
				failed++;
				info_printf("stringmap_put(%s) returns unexpected old value\n", key);
			} else ok++;
		}

		if ((it = mget_stringmap_size(m)) != 26) {
			failed++;
			info_printf("stringmap_size() returned %d (expected %d)\n", it, 26);
		} else ok++;
	}

	// testing alloc/free in stringmap/hashmap
	mget_stringmap_clear(m);
	mget_stringmap_put(m, "thekey", NULL, 0) ? failed++ : ok++;
	mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;
	mget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++;
	mget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++;
	mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;

	// testing key/value identity alloc/free in stringmap/hashmap
	mget_stringmap_clear(m);
	mget_stringmap_put(m, "thekey", NULL, 0) ? failed++ : ok++;
	mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;
	mget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++;
	mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;

	mget_stringmap_free(&m);

	mget_http_challenge_t challenge;
	mget_http_parse_challenge("Basic realm=\"test realm\"", &challenge);
	mget_http_free_challenge(&challenge);

	mget_vector_t *challenges;
	challenges = mget_vector_create(2, 2, NULL);
	mget_vector_set_destructor(challenges, (void(*)(void *))mget_http_free_challenge);
	mget_http_parse_challenge("Basic realm=\"test realm\"", &challenge);
	mget_vector_add(challenges, &challenge, sizeof(challenge));
	mget_http_free_challenges(&challenges);

	char *response_text = strdup(
"HTTP/1.1 401 Authorization Required\r\n"\
"Date: Sun, 23 Dec 2012 21:03:45 GMT\r\n"\
"Server: Apache/2.2.22 (Debian)\r\n"\
"WWW-Authenticate: Digest realm=\"therealm\", nonce=\"Ip6MaovRBAA=c4af733c51270698260f5d357724c2cbce20fa3d\", algorithm=MD5, domain=\"/prot_digest_md5\", qop=\"auth\"\r\n"\
"Vary: Accept-Encoding\r\n"\
"Content-Length: 476\r\n"\
"Keep-Alive: timeout=5, max=99\r\n"\
"Connection: Keep-Alive\r\n"\
"Content-Type: text/html; charset=iso-8859-1\r\n\r\n");

	mget_iri_t *iri = mget_iri_parse("http://localhost/prot_digest_md5/", NULL);
	mget_http_request_t *req = mget_http_create_request(iri, "GET");
	mget_http_response_t *resp = mget_http_parse_response_header(response_text);
	mget_http_add_credentials(req, mget_vector_get(resp->challenges, 0), "tim", "123");
//	for (it=0;it<vec_size(req->lines);it++) {
//		info_printf("%s\n", (char *)vec_get(req->lines, it));
//	}
	mget_http_free_response(&resp);
	mget_http_free_request(&req);
	mget_iri_free(&iri);
	xfree(response_text);

// Authorization: Digest username="******", realm="therealm", nonce="Ip6MaovRBAA=c4af733c51270698260f5d357724c2cbce20fa3d", uri="/prot_digest_md5/", response="a99e2012d507a73dd46eb044d3f4641c", qop=auth, nc=00000001, cnonce="3d20faa1"

}
Ejemplo n.º 8
0
Archivo: test.c Proyecto: BIllli/mget
static void test_parse_challenge(void)
{
	static const struct test_data {
		const char *
			input;
		const char *
			scheme[3];
	} test_data[] = {
		{	// simplebasic
			"Basic realm=\"foo\"",
			{ "Basic", NULL }
		},
		{	// simplebasicucase
			"BASIC REALM=\"foo\"",
			{ "Basic", NULL }
		},
		{	// simplebasicucase
			"Basic , realm=\"foo\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"test realm\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"test-äöÜ\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"basic\", Newauth realm=\"newauth\"",
			{ "Basic", "Newauth", NULL }
		},
	};

	mget_vector_t *challenges;
	mget_http_challenge_t *challenge;

	// Testcases found here http://greenbytes.de/tech/tc/httpauth/
	challenges = mget_vector_create(2, 2, NULL);
	mget_vector_set_destructor(challenges, (void(*)(void *))mget_http_free_challenge);

	for (unsigned it = 0; it < countof(test_data); it++) {
		const struct test_data *t = &test_data[it];

		mget_http_parse_challenges(t->input, challenges);
		for (unsigned nchal = 0; nchal < countof(test_data[0].scheme) && t->scheme[nchal]; nchal++) {
			challenge = mget_vector_get(challenges, nchal);

			if (!t->scheme[nchal]) {
				if (challenge) {
					failed++;
					info_printf("Failed [%u]: mget_http_parse_challenges(%s) found %d challenges (expected %d)\n", it, t->input, mget_vector_size(challenges), nchal);
				}
				break;
			}

			if (!challenge) {
				failed++;
				info_printf("Failed [%u]: mget_http_parse_challenges(%s) did not find enough challenges\n", it, t->input);
				break;
			}

			if (!mget_strcasecmp_ascii(challenge->auth_scheme, t->scheme[nchal])) {
				ok++;
			} else {
				failed++;
				info_printf("Failed [%u]: mget_http_parse_challenges(%s) -> '%s' (expected '%s')\n", it, t->input, challenge->auth_scheme, t->scheme[nchal]);
			}
		}

		mget_vector_clear(challenges);
	}

	mget_http_free_challenges(&challenges);
}
Ejemplo n.º 9
0
Archivo: job.c Proyecto: darnir/mget
int job_validate_file(JOB *job)
{
	PART part;
	mget_metalink_t *metalink;
	off_t fsize;
	int fd, rc = -1, it;
	struct stat st;

	if (!job || !(metalink = job->metalink))
		return 0;

	memset(&part, 0, sizeof(PART));

	// create space to hold enough parts
	if (!job->parts)
		job->parts = mget_vector_create(mget_vector_size(metalink->pieces), 4, NULL);
	else
		mget_vector_clear(job->parts);

	fsize = metalink->size;

	if (mget_vector_size(metalink->hashes) == 0) {
		// multipart non-metalink download: do not clobber if file has expected size
		if (stat(metalink->name, &st) == 0 && st.st_size == fsize) {
			return 1; // we are done
		}
	}

	// truncate file if needed
	if (stat(metalink->name, &st) == 0 && st.st_size > fsize) {
		if (truncate(metalink->name, fsize) == -1)
			error_printf(_("Failed to truncate %s\n from %llu to %llu bytes\n"),
				metalink->name, (unsigned long long)st.st_size, (unsigned long long)fsize);
	}

	if ((fd = open(metalink->name, O_RDONLY)) != -1) {
		// file exists, check which piece is invalid and requeue it

		for (it = 0; errno != EINTR && it < mget_vector_size(metalink->hashes); it++) {
			mget_metalink_hash_t *hash = mget_vector_get(metalink->hashes, it);

			if ((rc = check_file_fd(hash, fd)) == -1)
				continue; // hash type not available, try next

			break;
		}

		if (rc == 1) {
			info_printf(_("Checksum OK for '%s'\n"), metalink->name);
			return 1; // we are done
		}
		else if (rc == -1) {
			// failed to check file, continue as if file is ok
			info_printf(_("Failed to build checksum, assuming file to be OK\n"));
			return 1; // we are done
		} else
			info_printf(_("Bad checksum for '%s'\n"), metalink->name);

//		if (vec_size(metalink->pieces) < 1)
//			return;

		for (it = 0; errno != EINTR && it < mget_vector_size(metalink->pieces); it++) {
			mget_metalink_piece_t *piece = mget_vector_get(metalink->pieces, it);
			mget_metalink_hash_t *hash = &piece->hash;

			if (fsize >= piece->length) {
				part.length = piece->length;
			} else {
				part.length = (size_t)fsize;
			}

			part.id = it + 1;

			if ((rc = check_piece_hash(hash, fd, part.position, part.length)) != 1) {
				info_printf(_("Piece %d/%d not OK - requeuing\n"), it + 1, mget_vector_size(metalink->pieces));
				mget_vector_add(job->parts, &part, sizeof(PART));
				debug_printf("  need to download %llu bytes from pos=%llu\n",
					(unsigned long long)part.length, (unsigned long long)part.position);
			}

			part.position += part.length;
			fsize -= piece->length;
		}
		close(fd);
	} else {
		for (it = 0; it < mget_vector_size(metalink->pieces); it++) {
			mget_metalink_piece_t *piece = mget_vector_get(metalink->pieces, it);

			if (fsize >= piece->length) {
				part.length = piece->length;
			} else {
				part.length = fsize;
			}

			part.id = it + 1;

			mget_vector_add(job->parts, &part, sizeof(PART));

			part.position += part.length;
			fsize -= piece->length;
		}
	}

	return 0;
}
Ejemplo n.º 10
0
Archivo: robots.c Proyecto: BIllli/mget
ROBOTS *mget_robots_parse(const char *data)
{
	ROBOTS *robots;
	ROBOTS_PATH path;
	int collect = 0;
	const char *p;

	if (!data || !*data)
		return NULL;

	robots = xcalloc(1, sizeof (ROBOTS));

	do {
		if (collect < 2 && !strncasecmp(data, "User-agent:", 11)) {
			if (!collect) {
				for (data += 11; *data == ' ' || *data == '\t'; data++);
				if (!strncasecmp(data, "mget", 4)) {
					collect = 1;
				}
				else if (*data == '*') {
					collect = 1;
				}
			} else
				collect = 2;
		}
		else if (collect == 1 && !strncasecmp(data, "Disallow:", 9)) {
			for (data += 9; *data == ' ' || *data == '\t'; data++);
			if (*data == '\r' || *data == '\n' || !*data) {
				// all allowed
				mget_vector_free(&robots->paths);
				collect = 2;
			} else {
				if (!robots->paths) {
					robots->paths = mget_vector_create(32, -2, NULL);
					mget_vector_set_destructor(robots->paths, (void(*)(void *))_free_path);
				}
				for (p = data; !isspace(*p); p++);
				path.len = p - data;
				path.path = strndup(data, path.len);
				mget_vector_add(robots->paths, &path, sizeof(path));
			}
		}
		else if (!strncasecmp(data, "Sitemap:", 8)) {
			for (data += 8; *data==' ' || *data == '\t'; data++);
			for (p = data; !isspace(*p); p++);

			if (!robots->sitemaps)
				robots->sitemaps = mget_vector_create(4, -2, NULL);
			mget_vector_add_noalloc(robots->sitemaps, strndup(data, p - data));
		}

		if ((data = strchr(data, '\n')))
			data++; // point to next line
	} while (data && *data);

/*
	for (int it = 0; it < mget_vector_size(robots->paths); it++) {
		ROBOTS_PATH *path = mget_vector_get(robots->paths, it);
		info_printf("path '%s'\n", path->path);
	}
	for (int it = 0; it < mget_vector_size(robots->sitemaps); it++) {
		const char *sitemap = mget_vector_get(robots->sitemaps, it);
		info_printf("sitemap '%s'\n", sitemap);
	}
*/

	return robots;
}