static void _atom_get_url(void *context, int flags, const char *dir, const char *attr, const char *val, size_t len, size_t pos G_GNUC_WGET_UNUSED) { struct atom_context *ctx = context; wget_string_t url; if (!val || !len) return; url.p = NULL; if ((flags & XML_FLG_ATTRIBUTE)) { if (!wget_strcasecmp_ascii(attr, "href") || !wget_strcasecmp_ascii(attr, "uri") || !wget_strcasecmp_ascii(attr, "src") || !wget_strcasecmp_ascii(attr, "scheme") || !wget_strcasecmp_ascii(attr, "xmlns") || !wget_strncasecmp_ascii(attr, "xmlns:", 6)) { for (;len && c_isspace(*val); val++, len--); // skip leading spaces for (;len && c_isspace(val[len - 1]); len--); // skip trailing spaces url.p = val; url.len = len; if (!ctx->urls) ctx->urls = wget_vector_create(32, -2, NULL); wget_vector_add(ctx->urls, &url, sizeof(url)); } } else if ((flags & XML_FLG_CONTENT)) { const char *elem = strrchr(dir, '/'); if (elem) { elem++; if (!wget_strcasecmp_ascii(elem, "icon") || !wget_strcasecmp_ascii(elem, "id") || !wget_strcasecmp_ascii(elem, "logo")) { for (;len && c_isspace(*val); val++, len--); // skip leading spaces for (;len && c_isspace(val[len - 1]); len--); // skip trailing spaces // debug_printf("#2 %02X %s %s '%.*s' %zd\n", flags, dir, attr, (int) len, val, len); url.p = val; url.len = len; if (!ctx->urls) ctx->urls = wget_vector_create(32, -2, NULL); wget_vector_add(ctx->urls, &url, sizeof(url)); } } } }
static void _add_piece(_metalink_context_t *ctx, const char *value) { wget_metalink_t *metalink = ctx->metalink; sscanf(value, "%127s", ctx->hash); if (ctx->length && *ctx->hash_type && *ctx->hash) { // hash for a piece of the file wget_metalink_piece_t piece, *piecep; if (!metalink->pieces) metalink->pieces = wget_vector_create(32, 32, NULL); piece.length = ctx->length; strlcpy(piece.hash.type, ctx->hash_type, sizeof(piece.hash.type)); strlcpy(piece.hash.hash_hex, ctx->hash, sizeof(piece.hash.hash_hex)); piecep = wget_vector_get(metalink->pieces, wget_vector_size(metalink->pieces) - 1); if (piecep) piece.position = piecep->position + piecep->length; else piece.position = 0; wget_vector_add(metalink->pieces, &piece, sizeof(wget_metalink_piece_t)); } *ctx->hash = 0; }
static void _add_mirror(_metalink_context_t *ctx, const char *value) { if (wget_strncasecmp_ascii(value, "http:", 5) && wget_strncasecmp_ascii(value, "https:", 6)) return; wget_metalink_t *metalink = ctx->metalink; wget_metalink_mirror_t mirror; memset(&mirror, 0, sizeof(wget_metalink_mirror_t)); strlcpy(mirror.location, ctx->location, sizeof(mirror.location)); mirror.priority = ctx->priority; mirror.iri = wget_iri_parse(value, NULL); if (!mirror.iri) return; if (!metalink->mirrors) { metalink->mirrors = wget_vector_create(4, 4, NULL); wget_vector_set_destructor(metalink->mirrors, (void(*)(void *))_free_mirror); } wget_vector_add(metalink->mirrors, &mirror, sizeof(wget_metalink_mirror_t)); *ctx->location = 0; ctx->priority = 999999; }
// Callback function, called from CSS parser for each URI found. static void _css_get_url(void *context, const char *url, size_t len, size_t pos) { _CSS_CONTEXT *ctx = context; WGET_PARSED_URL parsed_url = { .len = len, .pos = pos, .url = wget_strmemdup(url, len), .abs_url = NULL }; if (!ctx->uris) { ctx->uris = wget_vector_create(16, -2, NULL); wget_vector_set_destructor(ctx->uris, (wget_vector_destructor_t)_free_url); } wget_vector_add(ctx->uris, &parsed_url, sizeof(parsed_url)); } static void _urls_to_absolute(wget_vector_t *urls, wget_iri_t *base) { if (base && urls) { wget_buffer_t buf; wget_buffer_init(&buf, NULL, 1024); for (int it = 0; it < wget_vector_size(urls); it++) { WGET_PARSED_URL *url = wget_vector_get(urls, it); if (wget_iri_relative_to_abs(base, url->url, url->len, &buf)) url->abs_url = wget_strmemdup(buf.data, buf.length); else error_printf("Cannot resolve relative URI '%s'\n", url->url); } wget_buffer_deinit(&buf); } } wget_vector_t *wget_css_get_urls(const char *css, size_t len, wget_iri_t *base, const char **encoding) { _CSS_CONTEXT context = { .encoding = encoding }; wget_css_parse_buffer(css, len, _css_get_url, encoding ? _css_get_encoding : NULL, &context); _urls_to_absolute(context.uris, base); return context.uris; }
static void _add_file_hash(_metalink_context_t *ctx, const char *value) { wget_metalink_t *metalink = ctx->metalink; sscanf(value, "%127s", ctx->hash); if (*ctx->hash_type && *ctx->hash) { // hashes for the complete file wget_metalink_hash_t hash; memset(&hash, 0, sizeof(wget_metalink_hash_t)); strlcpy(hash.type, ctx->hash_type, sizeof(hash.type)); strlcpy(hash.hash_hex, ctx->hash, sizeof(hash.hash_hex)); if (!metalink->hashes) metalink->hashes = wget_vector_create(4, 4, NULL); wget_vector_add(metalink->hashes, &hash, sizeof(wget_metalink_hash_t)); } *ctx->hash_type = *ctx->hash = 0; }
void job_create_parts(JOB *job) { PART part; wget_metalink_t *metalink; ssize_t fsize; if (!(metalink = job->metalink)) return; memset(&part, 0, sizeof(PART)); // create space to hold enough parts if (!job->parts) job->parts = wget_vector_create(wget_vector_size(metalink->pieces), 4, NULL); else wget_vector_clear(job->parts); fsize = metalink->size; for (int it = 0; it < wget_vector_size(metalink->pieces); it++) { wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it); if (fsize >= piece->length) { part.length = piece->length; } else { part.length = fsize; } part.id = it + 1; wget_vector_add(job->parts, &part, sizeof(PART)); part.position += part.length; fsize -= piece->length; } }
int job_validate_file(JOB *job) { PART part; wget_metalink_t *metalink; off_t fsize; int fd, rc = -1; struct stat st; if (!(metalink = job->metalink)) return 0; memset(&part, 0, sizeof(PART)); // Metalink may be used without pieces... if (!metalink->pieces) { wget_metalink_piece_t piece; wget_metalink_hash_t *hash = wget_vector_get(metalink->hashes, 0); if (!hash) return 1; piece.length = metalink->size; piece.position = 0; strlcpy(piece.hash.type, hash->type, sizeof(piece.hash.type)); strlcpy(piece.hash.hash_hex, hash->hash_hex, sizeof(piece.hash.hash_hex)); metalink->pieces = wget_vector_create(1, 1, NULL); wget_vector_add(metalink->pieces, &piece, sizeof(wget_metalink_piece_t)); } // create space to hold enough parts if (!job->parts) job->parts = wget_vector_create(wget_vector_size(metalink->pieces), 4, NULL); else wget_vector_clear(job->parts); fsize = metalink->size; if (wget_vector_size(metalink->hashes) == 0) { // multipart non-metalink download: do not clobber if file has expected size if (stat(metalink->name, &st) == 0 && st.st_size == fsize) { return 1; // we are done } } // truncate file if needed if (stat(metalink->name, &st) == 0 && st.st_size > fsize) { if (truncate(metalink->name, fsize) == -1) error_printf(_("Failed to truncate %s\n from %llu to %llu bytes\n"), metalink->name, (unsigned long long)st.st_size, (unsigned long long)fsize); } if ((fd = open(metalink->name, O_RDONLY)) != -1) { // file exists, check which piece is invalid and requeue it for (int it = 0; errno != EINTR && it < wget_vector_size(metalink->hashes); it++) { wget_metalink_hash_t *hash = wget_vector_get(metalink->hashes, it); if ((rc = _check_file_fd(hash, fd)) == -1) continue; // hash type not available, try next break; } if (rc == 1) { info_printf(_("Checksum OK for '%s'\n"), metalink->name); close(fd); return 1; // we are done } else if (rc == -1) { // failed to check file, continue as if file is ok info_printf(_("Failed to build checksum, assuming file to be OK\n")); close(fd); return 1; // we are done } else info_printf(_("Bad checksum for '%s'\n"), metalink->name); // if (vec_size(metalink->pieces) < 1) // return; for (int it = 0; errno != EINTR && it < wget_vector_size(metalink->pieces); it++) { wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it); wget_metalink_hash_t *hash = &piece->hash; if (fsize >= piece->length) { part.length = piece->length; } else { part.length = (size_t)fsize; } part.id = it + 1; if ((rc = check_piece_hash(hash, fd, part.position, part.length)) != 1) { info_printf(_("Piece %d/%d not OK - requeuing\n"), it + 1, wget_vector_size(metalink->pieces)); wget_vector_add(job->parts, &part, sizeof(PART)); debug_printf(" need to download %llu bytes from pos=%llu\n", (unsigned long long)part.length, (unsigned long long)part.position); } part.position += part.length; fsize -= piece->length; } close(fd); } else { for (int it = 0; it < wget_vector_size(metalink->pieces); it++) { wget_metalink_piece_t *piece = wget_vector_get(metalink->pieces, it); if (fsize >= piece->length) { part.length = piece->length; } else { part.length = fsize; } part.id = it + 1; wget_vector_add(job->parts, &part, sizeof(PART)); part.position += part.length; fsize -= piece->length; } } return 0; }
int wget_vector_add_str(wget_vector_t *v, const char *s) { return wget_vector_add(v, s, strlen(s) + 1); }
static void test_stringmap(void) { wget_stringmap_t *m; char key[128], value[128], *val; int run, it; size_t valuesize; // the initial size of 16 forces the internal reshashing function to be called twice m = wget_stringmap_create(16); for (run = 0; run < 2; run++) { if (run) { wget_stringmap_clear(m); wget_stringmap_sethashfunc(m, hash_txt); } for (it = 0; it < 26; it++) { sprintf(key, "http://www.example.com/subdir/%d.html", it); valuesize = sprintf(value, "%d.html", it); if (wget_stringmap_put(m, key, value, valuesize + 1)) { failed++; info_printf("stringmap_put(%s) returns unexpected old value\n", key); } else ok++; } if ((it = wget_stringmap_size(m)) != 26) { failed++; info_printf("stringmap_size() returned %d (expected %d)\n", it, 26); } else ok++; // now, look up every single entry for (it = 0; it < 26; it++) { sprintf(key, "http://www.example.com/subdir/%d.html", it); sprintf(value, "%d.html", it); if (!(val = wget_stringmap_get(m, key))) { failed++; info_printf("stringmap_get(%s) didn't find entry\n", key); } else if (strcmp(val, value)) { failed++; info_printf("stringmap_get(%s) found '%s' (expected '%s')\n", key, val, value); } else ok++; } wget_stringmap_clear(m); if ((it = wget_stringmap_size(m)) != 0) { failed++; info_printf("stringmap_size() returned %d (expected 0)\n", it); } else ok++; for (it = 0; it < 26; it++) { sprintf(key, "http://www.example.com/subdir/%d.html", it); valuesize = sprintf(value, "%d.html", it); if (wget_stringmap_put(m, key, value, valuesize + 1)) { failed++; info_printf("stringmap_put(%s) returns unexpected old value\n", key); } else ok++; } if ((it = wget_stringmap_size(m)) != 26) { failed++; info_printf("stringmap_size() returned %d (expected %d)\n", it, 26); } else ok++; // now, remove every single entry for (it = 0; it < 26; it++) { sprintf(key, "http://www.example.com/subdir/%d.html", it); sprintf(value, "%d.html", it); wget_stringmap_remove(m, key); } if ((it = wget_stringmap_size(m)) != 0) { failed++; info_printf("stringmap_size() returned %d (expected 0)\n", it); } else ok++; for (it = 0; it < 26; it++) { sprintf(key, "http://www.example.com/subdir/%d.html", it); valuesize = sprintf(value, "%d.html", it); if (wget_stringmap_put(m, key, value, valuesize + 1)) { failed++; info_printf("stringmap_put(%s) returns unexpected old value\n", key); } else ok++; } if ((it = wget_stringmap_size(m)) != 26) { failed++; info_printf("stringmap_size() returned %d (expected %d)\n", it, 26); } else ok++; } // testing alloc/free in stringmap/hashmap wget_stringmap_clear(m); wget_stringmap_put(m, "thekey", NULL, 0) ? failed++ : ok++; wget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++; wget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++; wget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++; wget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++; // testing key/value identity alloc/free in stringmap/hashmap wget_stringmap_clear(m); wget_stringmap_put(m, "thekey", NULL, 0) ? failed++ : ok++; wget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++; wget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++; wget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++; wget_stringmap_free(&m); wget_http_challenge_t challenge; wget_http_parse_challenge("Basic realm=\"test realm\"", &challenge); wget_http_free_challenge(&challenge); wget_vector_t *challenges; challenges = wget_vector_create(2, 2, NULL); wget_vector_set_destructor(challenges, (void(*)(void *))wget_http_free_challenge); wget_http_parse_challenge("Basic realm=\"test realm\"", &challenge); wget_vector_add(challenges, &challenge, sizeof(challenge)); wget_http_free_challenges(&challenges); char *response_text = strdup( "HTTP/1.1 401 Authorization Required\r\n"\ "Date: Sun, 23 Dec 2012 21:03:45 GMT\r\n"\ "Server: Apache/2.2.22 (Debian)\r\n"\ "WWW-Authenticate: Digest realm=\"therealm\", nonce=\"Ip6MaovRBAA=c4af733c51270698260f5d357724c2cbce20fa3d\", algorithm=MD5, domain=\"/prot_digest_md5\", qop=\"auth\"\r\n"\ "Vary: Accept-Encoding\r\n"\ "Content-Length: 476\r\n"\ "Keep-Alive: timeout=5, max=99\r\n"\ "Connection: Keep-Alive\r\n"\ "Content-Type: text/html; charset=iso-8859-1\r\n\r\n"); wget_iri_t *iri = wget_iri_parse("http://localhost/prot_digest_md5/", NULL); wget_http_request_t *req = wget_http_create_request(iri, "GET"); wget_http_response_t *resp = wget_http_parse_response_header(response_text); wget_http_add_credentials(req, wget_vector_get(resp->challenges, 0), "tim", "123"); // for (it=0;it<vec_size(req->lines);it++) { // info_printf("%s\n", (char *)vec_get(req->lines, it)); // } wget_http_free_response(&resp); wget_http_free_request(&req); wget_iri_free(&iri); xfree(response_text); // Authorization: Digest username="******", realm="therealm", nonce="Ip6MaovRBAA=c4af733c51270698260f5d357724c2cbce20fa3d", uri="/prot_digest_md5/", response="a99e2012d507a73dd46eb044d3f4641c", qop=auth, nc=00000001, cnonce="3d20faa1" }