Пример #1
0
static void _atom_get_url(void *context, int flags, const char *dir, const char *attr, const char *val, size_t len, size_t pos G_GNUC_WGET_UNUSED)
{
	struct atom_context *ctx = context;
	wget_string_t url;

	if (!val || !len)
		return;

	url.p = NULL;

	if ((flags & XML_FLG_ATTRIBUTE)) {
		if (!wget_strcasecmp_ascii(attr, "href") || !wget_strcasecmp_ascii(attr, "uri")
			|| !wget_strcasecmp_ascii(attr, "src") || !wget_strcasecmp_ascii(attr, "scheme")
			|| !wget_strcasecmp_ascii(attr, "xmlns") || !wget_strncasecmp_ascii(attr, "xmlns:", 6))
		{
			for (;len && c_isspace(*val); val++, len--); // skip leading spaces
			for (;len && c_isspace(val[len - 1]); len--);  // skip trailing spaces

			url.p = val;
			url.len = len;

			if (!ctx->urls)
				ctx->urls = wget_vector_create(32, -2, NULL);

			wget_vector_add(ctx->urls, &url, sizeof(url));
		}
	}
	else if ((flags & XML_FLG_CONTENT)) {
		const char *elem = strrchr(dir, '/');

		if (elem) {
			elem++;

			if (!wget_strcasecmp_ascii(elem, "icon") || !wget_strcasecmp_ascii(elem, "id")
				 || !wget_strcasecmp_ascii(elem, "logo"))
			{
				for (;len && c_isspace(*val); val++, len--); // skip leading spaces
				for (;len && c_isspace(val[len - 1]); len--);  // skip trailing spaces

				// debug_printf("#2 %02X %s %s '%.*s' %zd\n", flags, dir, attr, (int) len, val, len);

				url.p = val;
				url.len = len;

				if (!ctx->urls)
					ctx->urls = wget_vector_create(32, -2, NULL);

				wget_vector_add(ctx->urls, &url, sizeof(url));
			}
		}
	}
}
Пример #2
0
static void _add_piece(_metalink_context_t *ctx, const char *value)
{
	wget_metalink_t *metalink = ctx->metalink;

	sscanf(value, "%127s", ctx->hash);

	if (ctx->length && *ctx->hash_type && *ctx->hash) {
		// hash for a piece of the file
		wget_metalink_piece_t piece, *piecep;

		if (!metalink->pieces)
			metalink->pieces = wget_vector_create(32, 32, NULL);

		piece.length = ctx->length;
		strlcpy(piece.hash.type, ctx->hash_type, sizeof(piece.hash.type));
		strlcpy(piece.hash.hash_hex, ctx->hash, sizeof(piece.hash.hash_hex));

		piecep = wget_vector_get(metalink->pieces, wget_vector_size(metalink->pieces) - 1);
		if (piecep)
			piece.position = piecep->position + piecep->length;
		else
			piece.position = 0;
		wget_vector_add(metalink->pieces, &piece, sizeof(wget_metalink_piece_t));
	}

	*ctx->hash = 0;
}
Пример #3
0
static void _add_mirror(_metalink_context_t *ctx, const char *value)
{
	if (wget_strncasecmp_ascii(value, "http:", 5) && wget_strncasecmp_ascii(value, "https:", 6))
		return;

	wget_metalink_t *metalink = ctx->metalink;
	wget_metalink_mirror_t mirror;

	memset(&mirror, 0, sizeof(wget_metalink_mirror_t));
	strlcpy(mirror.location, ctx->location, sizeof(mirror.location));
	mirror.priority = ctx->priority;
	mirror.iri = wget_iri_parse(value, NULL);

	if (!mirror.iri)
		return;

	if (!metalink->mirrors) {
		metalink->mirrors = wget_vector_create(4, 4, NULL);
		wget_vector_set_destructor(metalink->mirrors, (void(*)(void *))_free_mirror);
	}
	wget_vector_add(metalink->mirrors, &mirror, sizeof(wget_metalink_mirror_t));

	*ctx->location = 0;
	ctx->priority = 999999;
}
Пример #4
0
// Callback function, called from CSS parser for each URI found.
static void _css_get_url(void *context, const char *url, size_t len, size_t pos)
{
	_CSS_CONTEXT *ctx = context;
	WGET_PARSED_URL parsed_url = { .len = len, .pos = pos, .url = wget_strmemdup(url, len), .abs_url = NULL };

	if (!ctx->uris) {
		ctx->uris = wget_vector_create(16, -2, NULL);
		wget_vector_set_destructor(ctx->uris, (wget_vector_destructor_t)_free_url);
	}

	wget_vector_add(ctx->uris, &parsed_url, sizeof(parsed_url));
}

static void _urls_to_absolute(wget_vector_t *urls, wget_iri_t *base)
{
	if (base && urls) {
		wget_buffer_t buf;
		wget_buffer_init(&buf, NULL, 1024);

		for (int it = 0; it < wget_vector_size(urls); it++) {
			WGET_PARSED_URL *url = wget_vector_get(urls, it);

			if (wget_iri_relative_to_abs(base, url->url, url->len, &buf))
				url->abs_url = wget_strmemdup(buf.data, buf.length);
			else
				error_printf("Cannot resolve relative URI '%s'\n", url->url);
		}

		wget_buffer_deinit(&buf);
	}
}

wget_vector_t *wget_css_get_urls(const char *css, size_t len, wget_iri_t *base, const char **encoding)
{
	_CSS_CONTEXT context = { .encoding = encoding };

	wget_css_parse_buffer(css, len, _css_get_url, encoding ? _css_get_encoding : NULL, &context);
	_urls_to_absolute(context.uris, base);

	return context.uris;
}
Пример #5
0
static void _add_file_hash(_metalink_context_t *ctx, const char *value)
{
	wget_metalink_t *metalink = ctx->metalink;

	sscanf(value, "%127s", ctx->hash);

	if (*ctx->hash_type && *ctx->hash) {
		// hashes for the complete file
		wget_metalink_hash_t hash;

		memset(&hash, 0, sizeof(wget_metalink_hash_t));
		strlcpy(hash.type, ctx->hash_type, sizeof(hash.type));
		strlcpy(hash.hash_hex, ctx->hash, sizeof(hash.hash_hex));

		if (!metalink->hashes)
			metalink->hashes = wget_vector_create(4, 4, NULL);
		wget_vector_add(metalink->hashes, &hash, sizeof(wget_metalink_hash_t));
	}

	*ctx->hash_type = *ctx->hash = 0;
}
Пример #6
0
static void test_vector(void)
{
	struct ENTRY
		*tmp,
		txt_sorted[5] = { {""}, {"four"}, {"one"}, {"three"}, {"two"} },
		*txt[countof(txt_sorted)];
	wget_vector_t
		*v = wget_vector_create(2, -2, (int(*)(const void *, const void *))compare_txt);
	unsigned
		it;
	int
		n;

	// copy
	for (it = 0; it < countof(txt); it++)
		txt[it] = &txt_sorted[it];

	// shuffle txt
	for (it = 0; it < countof(txt); it++) {
		n = rand() % countof(txt);
		tmp = txt[n];
		txt[n] = txt[it];
		txt[it] = tmp;
	}

	for (it = 0; it < countof(txt); it++) {
		wget_vector_insert_sorted(v, txt[it], sizeof(struct ENTRY));
	}

	for (it = 0; it < countof(txt); it++) {
		struct ENTRY *e = wget_vector_get(v, it);
		if (!strcmp(e->txt,txt_sorted[it].txt))
			ok++;
		else
			failed++;
	}

	wget_vector_free(&v);
}
Пример #7
0
void job_create_parts(JOB *job)
{
	PART part;
	wget_metalink_t *metalink;
	ssize_t fsize;

	if (!(metalink = job->metalink))
		return;

	memset(&part, 0, sizeof(PART));

	// create space to hold enough parts
	if (!job->parts)
		job->parts = wget_vector_create(wget_vector_size(metalink->pieces), 4, NULL);
	else
		wget_vector_clear(job->parts);

	fsize = metalink->size;

	for (int it = 0; it < wget_vector_size(metalink->pieces); it++) {
		wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it);

		if (fsize >= piece->length) {
			part.length = piece->length;
		} else {
			part.length = fsize;
		}

		part.id = it + 1;

		wget_vector_add(job->parts, &part, sizeof(PART));

		part.position += part.length;
		fsize -= piece->length;
	}
}
Пример #8
0
int job_validate_file(JOB *job)
{
	PART part;
	wget_metalink_t *metalink;
	off_t fsize;
	int fd, rc = -1;
	struct stat st;

	if (!(metalink = job->metalink))
		return 0;

	memset(&part, 0, sizeof(PART));

	// Metalink may be used without pieces...
	if (!metalink->pieces) {
		wget_metalink_piece_t piece;
		wget_metalink_hash_t *hash = wget_vector_get(metalink->hashes, 0);

		if (!hash)
			return 1;

		piece.length = metalink->size;
		piece.position = 0;
		strlcpy(piece.hash.type, hash->type, sizeof(piece.hash.type));
		strlcpy(piece.hash.hash_hex, hash->hash_hex, sizeof(piece.hash.hash_hex));

		metalink->pieces = wget_vector_create(1, 1, NULL);
		wget_vector_add(metalink->pieces, &piece, sizeof(wget_metalink_piece_t));
	}

	// create space to hold enough parts
	if (!job->parts)
		job->parts = wget_vector_create(wget_vector_size(metalink->pieces), 4, NULL);
	else
		wget_vector_clear(job->parts);

	fsize = metalink->size;

	if (wget_vector_size(metalink->hashes) == 0) {
		// multipart non-metalink download: do not clobber if file has expected size
		if (stat(metalink->name, &st) == 0 && st.st_size == fsize) {
			return 1; // we are done
		}
	}

	// truncate file if needed
	if (stat(metalink->name, &st) == 0 && st.st_size > fsize) {
		if (truncate(metalink->name, fsize) == -1)
			error_printf(_("Failed to truncate %s\n from %llu to %llu bytes\n"),
				metalink->name, (unsigned long long)st.st_size, (unsigned long long)fsize);
	}

	if ((fd = open(metalink->name, O_RDONLY)) != -1) {
		// file exists, check which piece is invalid and requeue it

		for (int it = 0; errno != EINTR && it < wget_vector_size(metalink->hashes); it++) {
			wget_metalink_hash_t *hash = wget_vector_get(metalink->hashes, it);

			if ((rc = _check_file_fd(hash, fd)) == -1)
				continue; // hash type not available, try next

			break;
		}

		if (rc == 1) {
			info_printf(_("Checksum OK for '%s'\n"), metalink->name);
			close(fd);
			return 1; // we are done
		}
		else if (rc == -1) {
			// failed to check file, continue as if file is ok
			info_printf(_("Failed to build checksum, assuming file to be OK\n"));
			close(fd);
			return 1; // we are done
		} else
			info_printf(_("Bad checksum for '%s'\n"), metalink->name);

//		if (vec_size(metalink->pieces) < 1)
//			return;

		for (int it = 0; errno != EINTR && it < wget_vector_size(metalink->pieces); it++) {
			wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it);
			wget_metalink_hash_t *hash = &piece->hash;

			if (fsize >= piece->length) {
				part.length = piece->length;
			} else {
				part.length = (size_t)fsize;
			}

			part.id = it + 1;

			if ((rc = check_piece_hash(hash, fd, part.position, part.length)) != 1) {
				info_printf(_("Piece %d/%d not OK - requeuing\n"), it + 1, wget_vector_size(metalink->pieces));
				wget_vector_add(job->parts, &part, sizeof(PART));
				debug_printf("  need to download %llu bytes from pos=%llu\n",
					(unsigned long long)part.length, (unsigned long long)part.position);
			}

			part.position += part.length;
			fsize -= piece->length;
		}
		close(fd);
	} else {
		for (int it = 0; it < wget_vector_size(metalink->pieces); it++) {
			wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it);

			if (fsize >= piece->length) {
				part.length = piece->length;
			} else {
				part.length = fsize;
			}

			part.id = it + 1;

			wget_vector_add(job->parts, &part, sizeof(PART));

			part.position += part.length;
			fsize -= piece->length;
		}
	}

	return 0;
}
Пример #9
0
static void test_stringmap(void)
{
	wget_stringmap_t *m;
	char key[128], value[128], *val;
	int run, it;
	size_t valuesize;

	// the initial size of 16 forces the internal reshashing function to be called twice

	m = wget_stringmap_create(16);

	for (run = 0; run < 2; run++) {
		if (run) {
			wget_stringmap_clear(m);
			wget_stringmap_sethashfunc(m, hash_txt);
		}

		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			valuesize = sprintf(value, "%d.html", it);
			if (wget_stringmap_put(m, key, value, valuesize + 1)) {
				failed++;
				info_printf("stringmap_put(%s) returns unexpected old value\n", key);
			} else ok++;
		}

		if ((it = wget_stringmap_size(m)) != 26) {
			failed++;
			info_printf("stringmap_size() returned %d (expected %d)\n", it, 26);
		} else ok++;

		// now, look up every single entry
		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			sprintf(value, "%d.html", it);
			if (!(val = wget_stringmap_get(m, key))) {
				failed++;
				info_printf("stringmap_get(%s) didn't find entry\n", key);
			} else if (strcmp(val, value)) {
				failed++;
				info_printf("stringmap_get(%s) found '%s' (expected '%s')\n", key, val, value);
			} else ok++;
		}

		wget_stringmap_clear(m);

		if ((it = wget_stringmap_size(m)) != 0) {
			failed++;
			info_printf("stringmap_size() returned %d (expected 0)\n", it);
		} else ok++;

		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			valuesize = sprintf(value, "%d.html", it);
			if (wget_stringmap_put(m, key, value, valuesize + 1)) {
				failed++;
				info_printf("stringmap_put(%s) returns unexpected old value\n", key);
			} else ok++;
		}

		if ((it = wget_stringmap_size(m)) != 26) {
			failed++;
			info_printf("stringmap_size() returned %d (expected %d)\n", it, 26);
		} else ok++;

		// now, remove every single entry
		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			sprintf(value, "%d.html", it);
			wget_stringmap_remove(m, key);
		}

		if ((it = wget_stringmap_size(m)) != 0) {
			failed++;
			info_printf("stringmap_size() returned %d (expected 0)\n", it);
		} else ok++;

		for (it = 0; it < 26; it++) {
			sprintf(key, "http://www.example.com/subdir/%d.html", it);
			valuesize = sprintf(value, "%d.html", it);
			if (wget_stringmap_put(m, key, value, valuesize + 1)) {
				failed++;
				info_printf("stringmap_put(%s) returns unexpected old value\n", key);
			} else ok++;
		}

		if ((it = wget_stringmap_size(m)) != 26) {
			failed++;
			info_printf("stringmap_size() returned %d (expected %d)\n", it, 26);
		} else ok++;
	}

	// testing alloc/free in stringmap/hashmap
	wget_stringmap_clear(m);
	wget_stringmap_put(m, "thekey", NULL, 0) ? failed++ : ok++;
	wget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;
	wget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++;
	wget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++;
	wget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;

	// testing key/value identity alloc/free in stringmap/hashmap
	wget_stringmap_clear(m);
	wget_stringmap_put(m, "thekey", NULL, 0) ? failed++ : ok++;
	wget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;
	wget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++;
	wget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++;

	wget_stringmap_free(&m);

	wget_http_challenge_t challenge;
	wget_http_parse_challenge("Basic realm=\"test realm\"", &challenge);
	wget_http_free_challenge(&challenge);

	wget_vector_t *challenges;
	challenges = wget_vector_create(2, 2, NULL);
	wget_vector_set_destructor(challenges, (void(*)(void *))wget_http_free_challenge);
	wget_http_parse_challenge("Basic realm=\"test realm\"", &challenge);
	wget_vector_add(challenges, &challenge, sizeof(challenge));
	wget_http_free_challenges(&challenges);

	char *response_text = strdup(
"HTTP/1.1 401 Authorization Required\r\n"\
"Date: Sun, 23 Dec 2012 21:03:45 GMT\r\n"\
"Server: Apache/2.2.22 (Debian)\r\n"\
"WWW-Authenticate: Digest realm=\"therealm\", nonce=\"Ip6MaovRBAA=c4af733c51270698260f5d357724c2cbce20fa3d\", algorithm=MD5, domain=\"/prot_digest_md5\", qop=\"auth\"\r\n"\
"Vary: Accept-Encoding\r\n"\
"Content-Length: 476\r\n"\
"Keep-Alive: timeout=5, max=99\r\n"\
"Connection: Keep-Alive\r\n"\
"Content-Type: text/html; charset=iso-8859-1\r\n\r\n");

	wget_iri_t *iri = wget_iri_parse("http://localhost/prot_digest_md5/", NULL);
	wget_http_request_t *req = wget_http_create_request(iri, "GET");
	wget_http_response_t *resp = wget_http_parse_response_header(response_text);
	wget_http_add_credentials(req, wget_vector_get(resp->challenges, 0), "tim", "123");
//	for (it=0;it<vec_size(req->lines);it++) {
//		info_printf("%s\n", (char *)vec_get(req->lines, it));
//	}
	wget_http_free_response(&resp);
	wget_http_free_request(&req);
	wget_iri_free(&iri);
	xfree(response_text);

// Authorization: Digest username="******", realm="therealm", nonce="Ip6MaovRBAA=c4af733c51270698260f5d357724c2cbce20fa3d", uri="/prot_digest_md5/", response="a99e2012d507a73dd46eb044d3f4641c", qop=auth, nc=00000001, cnonce="3d20faa1"

}
Пример #10
0
static void test_parse_challenge(void)
{
	static const struct test_data {
		const char *
			input;
		const char *
			scheme[3];
	} test_data[] = {
		{	// simplebasic
			"Basic realm=\"foo\"",
			{ "Basic", NULL }
		},
		{	// simplebasicucase
			"BASIC REALM=\"foo\"",
			{ "Basic", NULL }
		},
		{	// simplebasicucase
			"Basic , realm=\"foo\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"test realm\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"test-äöÜ\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"basic\", Newauth realm=\"newauth\"",
			{ "Basic", "Newauth", NULL }
		},
	};

	wget_vector_t *challenges;
	wget_http_challenge_t *challenge;

	// Testcases found here http://greenbytes.de/tech/tc/httpauth/
	challenges = wget_vector_create(2, 2, NULL);
	wget_vector_set_destructor(challenges, (void(*)(void *))wget_http_free_challenge);

	for (unsigned it = 0; it < countof(test_data); it++) {
		const struct test_data *t = &test_data[it];

		wget_http_parse_challenges(t->input, challenges);
		for (unsigned nchal = 0; nchal < countof(test_data[0].scheme) && t->scheme[nchal]; nchal++) {
			challenge = wget_vector_get(challenges, nchal);

			if (!t->scheme[nchal]) {
				if (challenge) {
					failed++;
					info_printf("Failed [%u]: wget_http_parse_challenges(%s) found %d challenges (expected %u)\n", it, t->input, wget_vector_size(challenges), nchal);
				}
				break;
			}

			if (!challenge) {
				failed++;
				info_printf("Failed [%u]: wget_http_parse_challenges(%s) did not find enough challenges\n", it, t->input);
				break;
			}

			if (!wget_strcasecmp_ascii(challenge->auth_scheme, t->scheme[nchal])) {
				ok++;
			} else {
				failed++;
				info_printf("Failed [%u]: wget_http_parse_challenges(%s) -> '%s' (expected '%s')\n", it, t->input, challenge->auth_scheme, t->scheme[nchal]);
			}
		}

		wget_vector_clear(challenges);
	}

	wget_http_free_challenges(&challenges);
}