static void _add_piece(_metalink_context_t *ctx, const char *value) { wget_metalink_t *metalink = ctx->metalink; sscanf(value, "%127s", ctx->hash); if (ctx->length && *ctx->hash_type && *ctx->hash) { // hash for a piece of the file wget_metalink_piece_t piece, *piecep; if (!metalink->pieces) metalink->pieces = wget_vector_create(32, 32, NULL); piece.length = ctx->length; strlcpy(piece.hash.type, ctx->hash_type, sizeof(piece.hash.type)); strlcpy(piece.hash.hash_hex, ctx->hash, sizeof(piece.hash.hash_hex)); piecep = wget_vector_get(metalink->pieces, wget_vector_size(metalink->pieces) - 1); if (piecep) piece.position = piecep->position + piecep->length; else piece.position = 0; wget_vector_add(metalink->pieces, &piece, sizeof(wget_metalink_piece_t)); } *ctx->hash = 0; }
void job_create_parts(JOB *job) { PART part; wget_metalink_t *metalink; ssize_t fsize; if (!(metalink = job->metalink)) return; memset(&part, 0, sizeof(PART)); // create space to hold enough parts if (!job->parts) job->parts = wget_vector_create(wget_vector_size(metalink->pieces), 4, NULL); else wget_vector_clear(job->parts); fsize = metalink->size; for (int it = 0; it < wget_vector_size(metalink->pieces); it++) { wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it); if (fsize >= piece->length) { part.length = piece->length; } else { part.length = fsize; } part.id = it + 1; wget_vector_add(job->parts, &part, sizeof(PART)); part.position += part.length; fsize -= piece->length; } }
// Callback function, called from CSS parser for each URI found. static void _css_get_url(void *context, const char *url, size_t len, size_t pos) { _CSS_CONTEXT *ctx = context; WGET_PARSED_URL parsed_url = { .len = len, .pos = pos, .url = wget_strmemdup(url, len), .abs_url = NULL }; if (!ctx->uris) { ctx->uris = wget_vector_create(16, -2, NULL); wget_vector_set_destructor(ctx->uris, (wget_vector_destructor_t)_free_url); } wget_vector_add(ctx->uris, &parsed_url, sizeof(parsed_url)); } static void _urls_to_absolute(wget_vector_t *urls, wget_iri_t *base) { if (base && urls) { wget_buffer_t buf; wget_buffer_init(&buf, NULL, 1024); for (int it = 0; it < wget_vector_size(urls); it++) { WGET_PARSED_URL *url = wget_vector_get(urls, it); if (wget_iri_relative_to_abs(base, url->url, url->len, &buf)) url->abs_url = wget_strmemdup(buf.data, buf.length); else error_printf("Cannot resolve relative URI '%s'\n", url->url); } wget_buffer_deinit(&buf); } } wget_vector_t *wget_css_get_urls(const char *css, size_t len, wget_iri_t *base, const char **encoding) { _CSS_CONTEXT context = { .encoding = encoding }; wget_css_parse_buffer(css, len, _css_get_url, encoding ? _css_get_encoding : NULL, &context); _urls_to_absolute(context.uris, base); return context.uris; }
int job_validate_file(JOB *job) { PART part; wget_metalink_t *metalink; off_t fsize; int fd, rc = -1; struct stat st; if (!(metalink = job->metalink)) return 0; memset(&part, 0, sizeof(PART)); // Metalink may be used without pieces... if (!metalink->pieces) { wget_metalink_piece_t piece; wget_metalink_hash_t *hash = wget_vector_get(metalink->hashes, 0); if (!hash) return 1; piece.length = metalink->size; piece.position = 0; strlcpy(piece.hash.type, hash->type, sizeof(piece.hash.type)); strlcpy(piece.hash.hash_hex, hash->hash_hex, sizeof(piece.hash.hash_hex)); metalink->pieces = wget_vector_create(1, 1, NULL); wget_vector_add(metalink->pieces, &piece, sizeof(wget_metalink_piece_t)); } // create space to hold enough parts if (!job->parts) job->parts = wget_vector_create(wget_vector_size(metalink->pieces), 4, NULL); else wget_vector_clear(job->parts); fsize = metalink->size; if (wget_vector_size(metalink->hashes) == 0) { // multipart non-metalink download: do not clobber if file has expected size if (stat(metalink->name, &st) == 0 && st.st_size == fsize) { return 1; // we are done } } // truncate file if needed if (stat(metalink->name, &st) == 0 && st.st_size > fsize) { if (truncate(metalink->name, fsize) == -1) error_printf(_("Failed to truncate %s\n from %llu to %llu bytes\n"), metalink->name, (unsigned long long)st.st_size, (unsigned long long)fsize); } if ((fd = open(metalink->name, O_RDONLY)) != -1) { // file exists, check which piece is invalid and requeue it for (int it = 0; errno != EINTR && it < wget_vector_size(metalink->hashes); it++) { wget_metalink_hash_t *hash = wget_vector_get(metalink->hashes, it); if ((rc = _check_file_fd(hash, fd)) == -1) continue; // hash type not available, try next break; } if (rc == 1) { info_printf(_("Checksum OK for '%s'\n"), metalink->name); close(fd); return 1; // we are done } else if (rc == -1) { // failed to check file, continue as if file is ok info_printf(_("Failed to build checksum, assuming file to be OK\n")); close(fd); return 1; // we are done } else info_printf(_("Bad checksum for '%s'\n"), metalink->name); // if (vec_size(metalink->pieces) < 1) // return; for (int it = 0; errno != EINTR && it < wget_vector_size(metalink->pieces); it++) { wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it); wget_metalink_hash_t *hash = &piece->hash; if (fsize >= piece->length) { part.length = piece->length; } else { part.length = (size_t)fsize; } part.id = it + 1; if ((rc = check_piece_hash(hash, fd, part.position, part.length)) != 1) { info_printf(_("Piece %d/%d not OK - requeuing\n"), it + 1, wget_vector_size(metalink->pieces)); wget_vector_add(job->parts, &part, sizeof(PART)); debug_printf(" need to download %llu bytes from pos=%llu\n", (unsigned long long)part.length, (unsigned long long)part.position); } part.position += part.length; fsize -= piece->length; } close(fd); } else { for (int it = 0; it < wget_vector_size(metalink->pieces); it++) { wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it); if (fsize >= piece->length) { part.length = piece->length; } else { part.length = fsize; } part.id = it + 1; wget_vector_add(job->parts, &part, sizeof(PART)); part.position += part.length; fsize -= piece->length; } } return 0; }
static void html_parse_localfile(const char *fname) { char *data; const char *encoding = NULL; size_t len; if ((data = wget_read_file(fname, &len))) { if ((unsigned char)data[0] == 0xFE && (unsigned char)data[1] == 0xFF) { // Big-endian UTF-16 encoding = "UTF-16BE"; // adjust behind BOM, ignore trailing single byte data += 2; len -= 2; } else if ((unsigned char)data[0] == 0xFF && (unsigned char)data[1] == 0xFE) { // Little-endian UTF-16 encoding = "UTF-16LE"; // adjust behind BOM data += 2; len -= 2; } else if ((unsigned char)data[0] == 0xEF && (unsigned char)data[1] == 0xBB && (unsigned char)data[2] == 0xBF) { // UTF-8 encoding = "UTF-8"; // adjust behind BOM data += 3; len -= 3; } if (encoding) printf("URI encoding '%s' set by BOM\n", encoding); if (!wget_strncasecmp_ascii(encoding, "UTF-16", 6)) { size_t n; char *utf8; len -= len & 1; // ignore single trailing byte, else charset conversion fails if (wget_memiconv(encoding, data, len, "UTF-8", &utf8, &n) == 0) { printf("Convert non-ASCII encoding '%s' to UTF-8\n", encoding); data = utf8; } else { printf("Failed to convert non-ASCII encoding '%s' to UTF-8, skip parsing\n", encoding); return; } } WGET_HTML_PARSED_RESULT *res = wget_html_get_urls_inline(data, NULL, NULL); if (encoding) { if (res->encoding && wget_strcasecmp_ascii(encoding, res->encoding)) printf("Encoding '%s' as stated in document has been ignored\n", encoding); } for (int it = 0; it < wget_vector_size(res->uris); it++) { WGET_HTML_PARSED_URL *html_url = wget_vector_get(res->uris, it); wget_string_t *url = &html_url->url; printf(" %s.%s '%.*s'\n", html_url->dir, html_url->attr, (int) url->len, url->p); } wget_xfree(data); wget_html_free_urls_inline(&res); } }
static void test_parse_challenge(void) { static const struct test_data { const char * input; const char * scheme[3]; } test_data[] = { { // simplebasic "Basic realm=\"foo\"", { "Basic", NULL } }, { // simplebasicucase "BASIC REALM=\"foo\"", { "Basic", NULL } }, { // simplebasicucase "Basic , realm=\"foo\"", { "Basic", NULL } }, { // "Basic realm=\"test realm\"", { "Basic", NULL } }, { // "Basic realm=\"test-äöÜ\"", { "Basic", NULL } }, { // "Basic realm=\"basic\", Newauth realm=\"newauth\"", { "Basic", "Newauth", NULL } }, }; wget_vector_t *challenges; wget_http_challenge_t *challenge; // Testcases found here http://greenbytes.de/tech/tc/httpauth/ challenges = wget_vector_create(2, 2, NULL); wget_vector_set_destructor(challenges, (void(*)(void *))wget_http_free_challenge); for (unsigned it = 0; it < countof(test_data); it++) { const struct test_data *t = &test_data[it]; wget_http_parse_challenges(t->input, challenges); for (unsigned nchal = 0; nchal < countof(test_data[0].scheme) && t->scheme[nchal]; nchal++) { challenge = wget_vector_get(challenges, nchal); if (!t->scheme[nchal]) { if (challenge) { failed++; info_printf("Failed [%u]: wget_http_parse_challenges(%s) found %d challenges (expected %u)\n", it, t->input, wget_vector_size(challenges), nchal); } break; } if (!challenge) { failed++; info_printf("Failed [%u]: wget_http_parse_challenges(%s) did not find enough challenges\n", it, t->input); break; } if (!wget_strcasecmp_ascii(challenge->auth_scheme, t->scheme[nchal])) { ok++; } else { failed++; info_printf("Failed [%u]: wget_http_parse_challenges(%s) -> '%s' (expected '%s')\n", it, t->input, challenge->auth_scheme, t->scheme[nchal]); } } wget_vector_clear(challenges); } wget_http_free_challenges(&challenges); }