char *mget_cookie_create_request_header(const MGET_IRI *iri) { int it, init = 0; time_t now = time(NULL); mget_buffer_t buf; debug_printf("cookie_create_request_header for host=%s path=%s\n",iri->host,iri->path); pthread_mutex_lock(&cookies_mutex); for (it = 0; it < mget_vector_size(cookies); it++) { MGET_COOKIE *cookie = mget_vector_get(cookies, it); if (((!cookie->host_only && domain_match(cookie->domain, iri->host)) || (cookie->host_only && !strcasecmp(cookie->domain, iri->host))) && (!cookie->expires || cookie->expires >= now) && (!cookie->secure_only || (cookie->secure_only && iri->scheme == IRI_SCHEME_HTTPS)) && path_match(cookie->path, iri->path)) { if (!init) { mget_buffer_init(&buf, NULL, 128); init = 1; } if (buf.length) mget_buffer_printf_append2(&buf, "; %s=%s", cookie->name, cookie->value); else mget_buffer_printf_append2(&buf, "%s=%s", cookie->name, cookie->value); } } pthread_mutex_unlock(&cookies_mutex); return init ? buf.data : NULL; }
void mget_cookie_normalize_cookies(const MGET_IRI *iri, const MGET_VECTOR *cookies) { int it; for (it = 0; it < mget_vector_size(cookies); it++) mget_cookie_normalize_cookie(iri, mget_vector_get(cookies, it)); }
void queue_del(JOB *job) { if (job) { debug_printf("queue_del %p\n", (void *)job); // special handling for automatic robots.txt jobs if (job->deferred) { JOB new_job = { .iri = NULL }; if (job->host) job->host->robot_job = NULL; mget_iri_free(&job->iri); // create a job for each deferred IRI for (int it = 0; it < mget_vector_size(job->deferred); it++) { new_job.iri = mget_vector_get(job->deferred, it); new_job.local_filename = get_local_filename(new_job.iri); queue_add_job(&new_job); } } job_free(job); mget_thread_mutex_lock(&mutex); mget_list_remove(&queue, job); mget_thread_mutex_unlock(&mutex); }
void mget_cookie_normalize_cookies(const mget_iri_t *iri, const mget_vector_t *cookies) { // mget_thread_mutex_lock(&_cookies_mutex); for (int it = 0; it < mget_vector_size(cookies); it++) _mget_cookie_normalize_cookie(iri, mget_vector_get(cookies, it)); // mget_thread_mutex_unlock(&_cookies_mutex); }
void mget_cookie_store_cookies(MGET_VECTOR *cookies) { int it; for (it = mget_vector_size(cookies) - 1; it >= 0; it--) { MGET_COOKIE *cookie = mget_vector_get(cookies, it); mget_cookie_store_cookie(cookie); mget_vector_remove(cookies, it); } }
int mget_cookie_db_save(mget_cookie_db_t *cookie_db, const char *fname, int keep_session_cookies) { FILE *fp; int it, ret = -1; time_t now = time(NULL); if (!cookie_db || !fname) return -1; info_printf(_("saving cookies to '%s'\n"), fname); if ((fp = fopen(fname, "w"))) { fputs("# HTTP cookie file\n", fp); fputs("#Generated by Mget " PACKAGE_VERSION ". Edit at your own risk.\n\n", fp); mget_thread_mutex_lock(&cookie_db->mutex); for (it = 0; it < mget_vector_size(cookie_db->cookies) && !ferror(fp); it++) { mget_cookie_t *cookie = mget_vector_get(cookie_db->cookies, it); if (cookie->persistent) { if (cookie->expires < now) continue; } else if (!keep_session_cookies) continue; fprintf(fp, "%s%s%s\t%s\t%s\t%s\t%"PRId64"\t%s\t%s\n", cookie->http_only ? "#HttpOnly_" : "", cookie->domain_dot ? "." : "", // compatibility, irrelevant since RFC 6562 cookie->domain, cookie->host_only ? "FALSE" : "TRUE", cookie->path, cookie->secure_only ? "TRUE" : "FALSE", (int64_t)cookie->expires, cookie->name, cookie->value); } mget_thread_mutex_unlock(&cookie_db->mutex); if (!ferror(fp)) ret = 0; if (fclose(fp)) ret = -1; if (ret) error_printf(_("Failed to write to cookie file '%s' (%d)\n"), fname, errno); } else error_printf(_("Failed to open cookie file '%s' (%d)\n"), fname, errno); return ret; }
void mget_cookie_store_cookies(mget_cookie_db_t *cookie_db, mget_vector_t *cookies) { if (cookie_db) { int it; for (it = mget_vector_size(cookies) - 1; it >= 0; it--) { mget_cookie_t *cookie = mget_vector_get(cookies, it); mget_cookie_store_cookie(cookie_db, cookie); // stores a shallow copy of 'cookie' mget_vector_remove_nofree(cookies, it); xfree(cookie); // shallow free of 'cookie' } } }
void job_create_parts(JOB *job) { PART part; mget_metalink_t *metalink; ssize_t fsize; int it; if (!job || !(metalink = job->metalink)) return; memset(&part, 0, sizeof(PART)); // create space to hold enough parts if (!job->parts) job->parts = mget_vector_create(mget_vector_size(metalink->pieces), 4, NULL); else mget_vector_clear(job->parts); fsize = metalink->size; for (it = 0; it < mget_vector_size(metalink->pieces); it++) { mget_metalink_piece_t *piece = mget_vector_get(metalink->pieces, it); if (fsize >= piece->length) { part.length = piece->length; } else { part.length = fsize; } part.id = it + 1; mget_vector_add(job->parts, &part, sizeof(PART)); part.position += part.length; fsize -= piece->length; } }
static void test_parse_challenge(void) { static const struct test_data { const char * input; const char * scheme[3]; } test_data[] = { { // simplebasic "Basic realm=\"foo\"", { "Basic", NULL } }, { // simplebasicucase "BASIC REALM=\"foo\"", { "Basic", NULL } }, { // simplebasicucase "Basic , realm=\"foo\"", { "Basic", NULL } }, { // "Basic realm=\"test realm\"", { "Basic", NULL } }, { // "Basic realm=\"test-äöÜ\"", { "Basic", NULL } }, { // "Basic realm=\"basic\", Newauth realm=\"newauth\"", { "Basic", "Newauth", NULL } }, }; mget_vector_t *challenges; mget_http_challenge_t *challenge; // Testcases found here http://greenbytes.de/tech/tc/httpauth/ challenges = mget_vector_create(2, 2, NULL); mget_vector_set_destructor(challenges, (void(*)(void *))mget_http_free_challenge); for (unsigned it = 0; it < countof(test_data); it++) { const struct test_data *t = &test_data[it]; mget_http_parse_challenges(t->input, challenges); for (unsigned nchal = 0; nchal < countof(test_data[0].scheme) && t->scheme[nchal]; nchal++) { challenge = mget_vector_get(challenges, nchal); if (!t->scheme[nchal]) { if (challenge) { failed++; info_printf("Failed [%u]: mget_http_parse_challenges(%s) found %d challenges (expected %d)\n", it, t->input, mget_vector_size(challenges), nchal); } break; } if (!challenge) { failed++; info_printf("Failed [%u]: mget_http_parse_challenges(%s) did not find enough challenges\n", it, t->input); break; } if (!mget_strcasecmp_ascii(challenge->auth_scheme, t->scheme[nchal])) { ok++; } else { failed++; info_printf("Failed [%u]: mget_http_parse_challenges(%s) -> '%s' (expected '%s')\n", it, t->input, challenge->auth_scheme, t->scheme[nchal]); } } mget_vector_clear(challenges); } mget_http_free_challenges(&challenges); }
// normalize/sanitize and store cookies int mget_cookie_normalize_cookie(const MGET_IRI *iri, MGET_COOKIE *cookie) { /* log_printf("normalize cookie %s=%s\n", cookie->name, cookie->value); log_printf("< %s=%s\n", cookie->name, cookie->value); log_printf("< expires=%ld max-age=%ld\n", cookie->expires, cookie->maxage); log_printf("< domain=%s\n", cookie->domain); log_printf("< path=%s\n", cookie->path); log_printf("< normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n", cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only); */ cookie->normalized = 0; if (cookie->maxage) cookie->expires = cookie->maxage; cookie->persistent = !!cookie->expires; if (cookie->domain) { char *p; // convert domain to lowercase for (p = (char *)cookie->domain; *p; p++) if (isupper(*p)) *p = tolower(*p); } if (iri) { // cookies comes from a HTTP header and needs checking if (!cookie->domain) cookie->domain = strdup(""); // respect http://publicsuffix.org/list/ to avoid "supercookies" if (mget_vector_size(suffixes) > 0 && mget_cookie_suffix_match(cookie->domain)) { info_printf("Supercookie %s not accepted\n", cookie->domain); return 0; } if (*cookie->domain) { if (domain_match(cookie->domain, iri->host)) { cookie->host_only = 0; } else { debug_printf("Domain mismatch: %s %s\n", cookie->domain, iri->host); return 0; // ignore cookie } } else { xfree(cookie->domain); cookie->domain = strdup(iri->host); cookie->host_only = 1; } if (!cookie->path || *cookie->path != '/') { const char *p = iri->path ? strrchr(iri->path, '/') : NULL; if (p && p != iri->path) { cookie->path = strndup(iri->path, p - iri->path); } else { cookie->path = strdup("/"); // err_printf(_("Unexpected URI without '/': %s\n"), iri->path); // return 0; // ignore cookie } } } cookie->normalized = 1; /* log_printf("> %s=%s\n", cookie->name, cookie->value); log_printf("> expires=%ld max-age=%ld\n", cookie->expires, cookie->maxage); log_printf("> domain=%s\n", cookie->domain); log_printf("> path=%s\n", cookie->path); log_printf("> normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n", cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only); */ return 1; }
int job_validate_file(JOB *job) { PART part; mget_metalink_t *metalink; off_t fsize; int fd, rc = -1, it; struct stat st; if (!job || !(metalink = job->metalink)) return 0; memset(&part, 0, sizeof(PART)); // create space to hold enough parts if (!job->parts) job->parts = mget_vector_create(mget_vector_size(metalink->pieces), 4, NULL); else mget_vector_clear(job->parts); fsize = metalink->size; if (mget_vector_size(metalink->hashes) == 0) { // multipart non-metalink download: do not clobber if file has expected size if (stat(metalink->name, &st) == 0 && st.st_size == fsize) { return 1; // we are done } } // truncate file if needed if (stat(metalink->name, &st) == 0 && st.st_size > fsize) { if (truncate(metalink->name, fsize) == -1) error_printf(_("Failed to truncate %s\n from %llu to %llu bytes\n"), metalink->name, (unsigned long long)st.st_size, (unsigned long long)fsize); } if ((fd = open(metalink->name, O_RDONLY)) != -1) { // file exists, check which piece is invalid and requeue it for (it = 0; errno != EINTR && it < mget_vector_size(metalink->hashes); it++) { mget_metalink_hash_t *hash = mget_vector_get(metalink->hashes, it); if ((rc = check_file_fd(hash, fd)) == -1) continue; // hash type not available, try next break; } if (rc == 1) { info_printf(_("Checksum OK for '%s'\n"), metalink->name); return 1; // we are done } else if (rc == -1) { // failed to check file, continue as if file is ok info_printf(_("Failed to build checksum, assuming file to be OK\n")); return 1; // we are done } else info_printf(_("Bad checksum for '%s'\n"), metalink->name); // if (vec_size(metalink->pieces) < 1) // return; for (it = 0; errno != EINTR && it < mget_vector_size(metalink->pieces); it++) { mget_metalink_piece_t *piece = mget_vector_get(metalink->pieces, it); mget_metalink_hash_t *hash = &piece->hash; if (fsize >= piece->length) { part.length = piece->length; } else { part.length = (size_t)fsize; } part.id = it + 1; if ((rc = check_piece_hash(hash, fd, part.position, part.length)) != 1) { info_printf(_("Piece %d/%d not OK - requeuing\n"), it + 1, mget_vector_size(metalink->pieces)); mget_vector_add(job->parts, &part, sizeof(PART)); debug_printf(" need to download %llu bytes from pos=%llu\n", (unsigned long long)part.length, (unsigned long long)part.position); } part.position += part.length; fsize -= piece->length; } close(fd); } else { for (it = 0; it < mget_vector_size(metalink->pieces); it++) { mget_metalink_piece_t *piece = mget_vector_get(metalink->pieces, it); if (fsize >= piece->length) { part.length = piece->length; } else { part.length = fsize; } part.id = it + 1; mget_vector_add(job->parts, &part, sizeof(PART)); part.position += part.length; fsize -= piece->length; } } return 0; }