Esempio n. 1
0
static void _add_piece(_metalink_context_t *ctx, const char *value)
{
	wget_metalink_t *metalink = ctx->metalink;

	sscanf(value, "%127s", ctx->hash);

	if (ctx->length && *ctx->hash_type && *ctx->hash) {
		// hash for a piece of the file
		wget_metalink_piece_t piece, *piecep;

		if (!metalink->pieces)
			metalink->pieces = wget_vector_create(32, 32, NULL);

		piece.length = ctx->length;
		strlcpy(piece.hash.type, ctx->hash_type, sizeof(piece.hash.type));
		strlcpy(piece.hash.hash_hex, ctx->hash, sizeof(piece.hash.hash_hex));

		piecep = wget_vector_get(metalink->pieces, wget_vector_size(metalink->pieces) - 1);
		if (piecep)
			piece.position = piecep->position + piecep->length;
		else
			piece.position = 0;
		wget_vector_add(metalink->pieces, &piece, sizeof(wget_metalink_piece_t));
	}

	*ctx->hash = 0;
}
Esempio n. 2
0
void job_create_parts(JOB *job)
{
	PART part;
	wget_metalink_t *metalink;
	ssize_t fsize;

	if (!(metalink = job->metalink))
		return;

	memset(&part, 0, sizeof(PART));

	// create space to hold enough parts
	if (!job->parts)
		job->parts = wget_vector_create(wget_vector_size(metalink->pieces), 4, NULL);
	else
		wget_vector_clear(job->parts);

	fsize = metalink->size;

	for (int it = 0; it < wget_vector_size(metalink->pieces); it++) {
		wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it);

		if (fsize >= piece->length) {
			part.length = piece->length;
		} else {
			part.length = fsize;
		}

		part.id = it + 1;

		wget_vector_add(job->parts, &part, sizeof(PART));

		part.position += part.length;
		fsize -= piece->length;
	}
}
Esempio n. 3
0
// Callback function, called from CSS parser for each URI found.
static void _css_get_url(void *context, const char *url, size_t len, size_t pos)
{
	_CSS_CONTEXT *ctx = context;
	WGET_PARSED_URL parsed_url = { .len = len, .pos = pos, .url = wget_strmemdup(url, len), .abs_url = NULL };

	if (!ctx->uris) {
		ctx->uris = wget_vector_create(16, -2, NULL);
		wget_vector_set_destructor(ctx->uris, (wget_vector_destructor_t)_free_url);
	}

	wget_vector_add(ctx->uris, &parsed_url, sizeof(parsed_url));
}

static void _urls_to_absolute(wget_vector_t *urls, wget_iri_t *base)
{
	if (base && urls) {
		wget_buffer_t buf;
		wget_buffer_init(&buf, NULL, 1024);

		for (int it = 0; it < wget_vector_size(urls); it++) {
			WGET_PARSED_URL *url = wget_vector_get(urls, it);

			if (wget_iri_relative_to_abs(base, url->url, url->len, &buf))
				url->abs_url = wget_strmemdup(buf.data, buf.length);
			else
				error_printf("Cannot resolve relative URI '%s'\n", url->url);
		}

		wget_buffer_deinit(&buf);
	}
}

wget_vector_t *wget_css_get_urls(const char *css, size_t len, wget_iri_t *base, const char **encoding)
{
	_CSS_CONTEXT context = { .encoding = encoding };

	wget_css_parse_buffer(css, len, _css_get_url, encoding ? _css_get_encoding : NULL, &context);
	_urls_to_absolute(context.uris, base);

	return context.uris;
}
Esempio n. 4
0
int job_validate_file(JOB *job)
{
	PART part;
	wget_metalink_t *metalink;
	off_t fsize;
	int fd, rc = -1;
	struct stat st;

	if (!(metalink = job->metalink))
		return 0;

	memset(&part, 0, sizeof(PART));

	// Metalink may be used without pieces...
	if (!metalink->pieces) {
		wget_metalink_piece_t piece;
		wget_metalink_hash_t *hash = wget_vector_get(metalink->hashes, 0);

		if (!hash)
			return 1;

		piece.length = metalink->size;
		piece.position = 0;
		strlcpy(piece.hash.type, hash->type, sizeof(piece.hash.type));
		strlcpy(piece.hash.hash_hex, hash->hash_hex, sizeof(piece.hash.hash_hex));

		metalink->pieces = wget_vector_create(1, 1, NULL);
		wget_vector_add(metalink->pieces, &piece, sizeof(wget_metalink_piece_t));
	}

	// create space to hold enough parts
	if (!job->parts)
		job->parts = wget_vector_create(wget_vector_size(metalink->pieces), 4, NULL);
	else
		wget_vector_clear(job->parts);

	fsize = metalink->size;

	if (wget_vector_size(metalink->hashes) == 0) {
		// multipart non-metalink download: do not clobber if file has expected size
		if (stat(metalink->name, &st) == 0 && st.st_size == fsize) {
			return 1; // we are done
		}
	}

	// truncate file if needed
	if (stat(metalink->name, &st) == 0 && st.st_size > fsize) {
		if (truncate(metalink->name, fsize) == -1)
			error_printf(_("Failed to truncate %s\n from %llu to %llu bytes\n"),
				metalink->name, (unsigned long long)st.st_size, (unsigned long long)fsize);
	}

	if ((fd = open(metalink->name, O_RDONLY)) != -1) {
		// file exists, check which piece is invalid and requeue it

		for (int it = 0; errno != EINTR && it < wget_vector_size(metalink->hashes); it++) {
			wget_metalink_hash_t *hash = wget_vector_get(metalink->hashes, it);

			if ((rc = _check_file_fd(hash, fd)) == -1)
				continue; // hash type not available, try next

			break;
		}

		if (rc == 1) {
			info_printf(_("Checksum OK for '%s'\n"), metalink->name);
			close(fd);
			return 1; // we are done
		}
		else if (rc == -1) {
			// failed to check file, continue as if file is ok
			info_printf(_("Failed to build checksum, assuming file to be OK\n"));
			close(fd);
			return 1; // we are done
		} else
			info_printf(_("Bad checksum for '%s'\n"), metalink->name);

//		if (vec_size(metalink->pieces) < 1)
//			return;

		for (int it = 0; errno != EINTR && it < wget_vector_size(metalink->pieces); it++) {
			wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it);
			wget_metalink_hash_t *hash = &piece->hash;

			if (fsize >= piece->length) {
				part.length = piece->length;
			} else {
				part.length = (size_t)fsize;
			}

			part.id = it + 1;

			if ((rc = check_piece_hash(hash, fd, part.position, part.length)) != 1) {
				info_printf(_("Piece %d/%d not OK - requeuing\n"), it + 1, wget_vector_size(metalink->pieces));
				wget_vector_add(job->parts, &part, sizeof(PART));
				debug_printf("  need to download %llu bytes from pos=%llu\n",
					(unsigned long long)part.length, (unsigned long long)part.position);
			}

			part.position += part.length;
			fsize -= piece->length;
		}
		close(fd);
	} else {
		for (int it = 0; it < wget_vector_size(metalink->pieces); it++) {
			wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it);

			if (fsize >= piece->length) {
				part.length = piece->length;
			} else {
				part.length = fsize;
			}

			part.id = it + 1;

			wget_vector_add(job->parts, &part, sizeof(PART));

			part.position += part.length;
			fsize -= piece->length;
		}
	}

	return 0;
}
Esempio n. 5
0
static void html_parse_localfile(const char *fname)
{
	char *data;
	const char *encoding = NULL;
	size_t len;

	if ((data = wget_read_file(fname, &len))) {
		if ((unsigned char)data[0] == 0xFE && (unsigned char)data[1] == 0xFF) {
			// Big-endian UTF-16
			encoding = "UTF-16BE";

			// adjust behind BOM, ignore trailing single byte
			data += 2;
			len -= 2;
		} else if ((unsigned char)data[0] == 0xFF && (unsigned char)data[1] == 0xFE) {
			// Little-endian UTF-16
			encoding = "UTF-16LE";

			// adjust behind BOM
			data += 2;
			len -= 2;
		} else if ((unsigned char)data[0] == 0xEF && (unsigned char)data[1] == 0xBB && (unsigned char)data[2] == 0xBF) {
			// UTF-8
			encoding = "UTF-8";

			// adjust behind BOM
			data += 3;
			len -= 3;
		}

		if (encoding)
			printf("URI encoding '%s' set by BOM\n", encoding);

		if (!wget_strncasecmp_ascii(encoding, "UTF-16", 6)) {
			size_t n;
			char *utf8;

			len -= len & 1; // ignore single trailing byte, else charset conversion fails

			if (wget_memiconv(encoding, data, len, "UTF-8", &utf8, &n) == 0) {
				printf("Convert non-ASCII encoding '%s' to UTF-8\n", encoding);
				data = utf8;
			} else {
				printf("Failed to convert non-ASCII encoding '%s' to UTF-8, skip parsing\n", encoding);
				return;
			}
		}

		WGET_HTML_PARSED_RESULT *res  = wget_html_get_urls_inline(data, NULL, NULL);

		if (encoding) {
			if (res->encoding && wget_strcasecmp_ascii(encoding, res->encoding))
				printf("Encoding '%s' as stated in document has been ignored\n", encoding);
		}

		for (int it = 0; it < wget_vector_size(res->uris); it++) {
			WGET_HTML_PARSED_URL *html_url = wget_vector_get(res->uris, it);
			wget_string_t *url = &html_url->url;

			printf("  %s.%s '%.*s'\n", html_url->dir, html_url->attr, (int) url->len, url->p);
		}

		wget_xfree(data);
		wget_html_free_urls_inline(&res);
	}
}
Esempio n. 6
0
static void test_parse_challenge(void)
{
	static const struct test_data {
		const char *
			input;
		const char *
			scheme[3];
	} test_data[] = {
		{	// simplebasic
			"Basic realm=\"foo\"",
			{ "Basic", NULL }
		},
		{	// simplebasicucase
			"BASIC REALM=\"foo\"",
			{ "Basic", NULL }
		},
		{	// simplebasicucase
			"Basic , realm=\"foo\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"test realm\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"test-äöÜ\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"basic\", Newauth realm=\"newauth\"",
			{ "Basic", "Newauth", NULL }
		},
	};

	wget_vector_t *challenges;
	wget_http_challenge_t *challenge;

	// Testcases found here http://greenbytes.de/tech/tc/httpauth/
	challenges = wget_vector_create(2, 2, NULL);
	wget_vector_set_destructor(challenges, (void(*)(void *))wget_http_free_challenge);

	for (unsigned it = 0; it < countof(test_data); it++) {
		const struct test_data *t = &test_data[it];

		wget_http_parse_challenges(t->input, challenges);
		for (unsigned nchal = 0; nchal < countof(test_data[0].scheme) && t->scheme[nchal]; nchal++) {
			challenge = wget_vector_get(challenges, nchal);

			if (!t->scheme[nchal]) {
				if (challenge) {
					failed++;
					info_printf("Failed [%u]: wget_http_parse_challenges(%s) found %d challenges (expected %u)\n", it, t->input, wget_vector_size(challenges), nchal);
				}
				break;
			}

			if (!challenge) {
				failed++;
				info_printf("Failed [%u]: wget_http_parse_challenges(%s) did not find enough challenges\n", it, t->input);
				break;
			}

			if (!wget_strcasecmp_ascii(challenge->auth_scheme, t->scheme[nchal])) {
				ok++;
			} else {
				failed++;
				info_printf("Failed [%u]: wget_http_parse_challenges(%s) -> '%s' (expected '%s')\n", it, t->input, challenge->auth_scheme, t->scheme[nchal]);
			}
		}

		wget_vector_clear(challenges);
	}

	wget_http_free_challenges(&challenges);
}