int mget_cookie_load_public_suffixes(const char *fname) { PUBLIC_SUFFIX suffix, *suffixp; FILE *fp; int nsuffixes = 0; char *buf = NULL, *linep, *p; size_t bufsize = 0; ssize_t buflen; // as of 02.11.2012, the list at http://publicsuffix.org/list/ contains ~6000 rules // and 40 exceptions. if (!suffixes) suffixes = mget_vector_create(6*1024, -2, (int(*)(const void *, const void *))suffix_compare); if (!suffix_exceptions) suffix_exceptions = mget_vector_create(64, -2, (int(*)(const void *, const void *))suffix_compare); if ((fp = fopen(fname, "r"))) { while ((buflen = mget_getline(&buf, &bufsize, fp)) >= 0) { linep = buf; while (isspace(*linep)) linep++; // ignore leading whitespace if (!*linep) continue; // skip empty lines if (*linep == '/' && linep[1] == '/') continue; // skip comments // parse suffix rule for (p = linep; *linep && !isspace(*linep);) linep++; *linep = 0; if (*p == '!') { // add to exceptions suffix_init(&suffix, p + 1, linep - p - 1); suffixp = mget_vector_get(suffix_exceptions, mget_vector_add(suffix_exceptions, &suffix, sizeof(suffix))); } else { suffix_init(&suffix, p, linep - p); suffixp = mget_vector_get(suffixes, mget_vector_add(suffixes, &suffix, sizeof(suffix))); } if (suffixp) suffixp->label = suffixp->label_buf; // set label to changed address nsuffixes++;; } xfree(buf); fclose(fp); mget_vector_sort(suffix_exceptions); mget_vector_sort(suffixes); } else error_printf(_("Failed to open public suffix file '%s'\n"), fname); return nsuffixes; }
int mget_cookie_suffix_match(const char *domain) { PUBLIC_SUFFIX suffix, *rule; const char *p, *label_bak; unsigned short length_bak; // this function should be called without leading dots, just make shure suffix.label = domain + (*domain == '.'); suffix.length = strlen(suffix.label); suffix.wildcard = 0; suffix.nlabels = 1; for (p = suffix.label; *p; p++) if (*p == '.') suffix.nlabels++; // if domain has enough labels, it won't match rule = mget_vector_get(suffixes, 0); if (!rule || rule->nlabels < suffix.nlabels - 1) return 0; rule = mget_vector_get(suffixes, mget_vector_find(suffixes, &suffix)); if (rule) { // definitely a match, no matter if the found rule is a wildcard or not return 1; } label_bak = suffix.label; length_bak = suffix.length; if ((suffix.label = strchr(suffix.label, '.'))) { suffix.label++; suffix.length = strlen(suffix.label); suffix.nlabels--; rule = mget_vector_get(suffixes, mget_vector_find(suffixes, &suffix)); if (rule) { if (rule->wildcard) { // now that we matched a wildcard, we have to check for an exception suffix.label = label_bak; suffix.length = length_bak; suffix.nlabels++; rule = mget_vector_get(suffix_exceptions, mget_vector_find(suffix_exceptions, &suffix)); if (rule) return 0; return 1; } } } return 0; }
char *mget_cookie_create_request_header(const MGET_IRI *iri) { int it, init = 0; time_t now = time(NULL); mget_buffer_t buf; debug_printf("cookie_create_request_header for host=%s path=%s\n",iri->host,iri->path); pthread_mutex_lock(&cookies_mutex); for (it = 0; it < mget_vector_size(cookies); it++) { MGET_COOKIE *cookie = mget_vector_get(cookies, it); if (((!cookie->host_only && domain_match(cookie->domain, iri->host)) || (cookie->host_only && !strcasecmp(cookie->domain, iri->host))) && (!cookie->expires || cookie->expires >= now) && (!cookie->secure_only || (cookie->secure_only && iri->scheme == IRI_SCHEME_HTTPS)) && path_match(cookie->path, iri->path)) { if (!init) { mget_buffer_init(&buf, NULL, 128); init = 1; } if (buf.length) mget_buffer_printf_append2(&buf, "; %s=%s", cookie->name, cookie->value); else mget_buffer_printf_append2(&buf, "%s=%s", cookie->name, cookie->value); } } pthread_mutex_unlock(&cookies_mutex); return init ? buf.data : NULL; }
void mget_cookie_store_cookie(MGET_COOKIE *cookie) { MGET_COOKIE *old; int pos; debug_printf("got cookie %s=%s\n", cookie->name, cookie->value); if (!cookie->normalized) return; pthread_mutex_lock(&cookies_mutex); if (!cookies) { cookies = mget_vector_create(128, -2, (int(*)(const void *, const void *))compare_cookie); old = NULL; } else old = mget_vector_get(cookies, pos = mget_vector_find(cookies, cookie)); if (old) { debug_printf("replace old cookie %s=%s\n", cookie->name, cookie->value); cookie->creation = old->creation; mget_cookie_free_cookie(old); mget_vector_replace(cookies, cookie, sizeof(*cookie), pos); } else { debug_printf("store new cookie %s=%s\n", cookie->name, cookie->value); mget_vector_insert_sorted(cookies, cookie, sizeof(*cookie)); } pthread_mutex_unlock(&cookies_mutex); }
void mget_cookie_normalize_cookies(const MGET_IRI *iri, const MGET_VECTOR *cookies) { int it; for (it = 0; it < mget_vector_size(cookies); it++) mget_cookie_normalize_cookie(iri, mget_vector_get(cookies, it)); }
void queue_del(JOB *job) { if (job) { debug_printf("queue_del %p\n", (void *)job); // special handling for automatic robots.txt jobs if (job->deferred) { JOB new_job = { .iri = NULL }; if (job->host) job->host->robot_job = NULL; mget_iri_free(&job->iri); // create a job for each deferred IRI for (int it = 0; it < mget_vector_size(job->deferred); it++) { new_job.iri = mget_vector_get(job->deferred, it); new_job.local_filename = get_local_filename(new_job.iri); queue_add_job(&new_job); } } job_free(job); mget_thread_mutex_lock(&mutex); mget_list_remove(&queue, job); mget_thread_mutex_unlock(&mutex); }
void mget_cookie_normalize_cookies(const mget_iri_t *iri, const mget_vector_t *cookies) { // mget_thread_mutex_lock(&_cookies_mutex); for (int it = 0; it < mget_vector_size(cookies); it++) _mget_cookie_normalize_cookie(iri, mget_vector_get(cookies, it)); // mget_thread_mutex_unlock(&_cookies_mutex); }
void mget_cookie_store_cookies(MGET_VECTOR *cookies) { int it; for (it = mget_vector_size(cookies) - 1; it >= 0; it--) { MGET_COOKIE *cookie = mget_vector_get(cookies, it); mget_cookie_store_cookie(cookie); mget_vector_remove(cookies, it); } }
int mget_cookie_db_save(mget_cookie_db_t *cookie_db, const char *fname, int keep_session_cookies) { FILE *fp; int it, ret = -1; time_t now = time(NULL); if (!cookie_db || !fname) return -1; info_printf(_("saving cookies to '%s'\n"), fname); if ((fp = fopen(fname, "w"))) { fputs("# HTTP cookie file\n", fp); fputs("#Generated by Mget " PACKAGE_VERSION ". Edit at your own risk.\n\n", fp); mget_thread_mutex_lock(&cookie_db->mutex); for (it = 0; it < mget_vector_size(cookie_db->cookies) && !ferror(fp); it++) { mget_cookie_t *cookie = mget_vector_get(cookie_db->cookies, it); if (cookie->persistent) { if (cookie->expires < now) continue; } else if (!keep_session_cookies) continue; fprintf(fp, "%s%s%s\t%s\t%s\t%s\t%"PRId64"\t%s\t%s\n", cookie->http_only ? "#HttpOnly_" : "", cookie->domain_dot ? "." : "", // compatibility, irrelevant since RFC 6562 cookie->domain, cookie->host_only ? "FALSE" : "TRUE", cookie->path, cookie->secure_only ? "TRUE" : "FALSE", (int64_t)cookie->expires, cookie->name, cookie->value); } mget_thread_mutex_unlock(&cookie_db->mutex); if (!ferror(fp)) ret = 0; if (fclose(fp)) ret = -1; if (ret) error_printf(_("Failed to write to cookie file '%s' (%d)\n"), fname, errno); } else error_printf(_("Failed to open cookie file '%s' (%d)\n"), fname, errno); return ret; }
void mget_cookie_store_cookies(mget_cookie_db_t *cookie_db, mget_vector_t *cookies) { if (cookie_db) { int it; for (it = mget_vector_size(cookies) - 1; it >= 0; it--) { mget_cookie_t *cookie = mget_vector_get(cookies, it); mget_cookie_store_cookie(cookie_db, cookie); // stores a shallow copy of 'cookie' mget_vector_remove_nofree(cookies, it); xfree(cookie); // shallow free of 'cookie' } } }
int mget_cookie_store_cookie(mget_cookie_db_t *cookie_db, mget_cookie_t *cookie) { mget_cookie_t *old; int pos; if (!cookie_db) { mget_cookie_deinit(cookie); return -1; } debug_printf("got cookie %s=%s\n", cookie->name, cookie->value); if (!cookie->normalized) { mget_cookie_deinit(cookie); return -1; } if (mget_cookie_check_psl(cookie_db, cookie) != 0) { debug_printf("cookie '%s' dropped, domain '%s' is a public suffix\n", cookie->name, cookie->domain); mget_cookie_deinit(cookie); return -1; } mget_thread_mutex_lock(&cookie_db->mutex); old = mget_vector_get(cookie_db->cookies, pos = mget_vector_find(cookie_db->cookies, cookie)); if (old) { debug_printf("replace old cookie %s=%s\n", cookie->name, cookie->value); cookie->creation = old->creation; mget_vector_replace(cookie_db->cookies, cookie, sizeof(*cookie), pos); } else { debug_printf("store new cookie %s=%s\n", cookie->name, cookie->value); mget_vector_insert_sorted(cookie_db->cookies, cookie, sizeof(*cookie)); } mget_thread_mutex_unlock(&cookie_db->mutex); return 0; }
static void test_vector(void) { struct ENTRY *tmp, txt_sorted[5] = { {""}, {"four"}, {"one"}, {"three"}, {"two"} }, *txt[countof(txt_sorted)]; mget_vector_t *v = mget_vector_create(2, -2, (int(*)(const void *, const void *))compare_txt); unsigned it; int n; // copy for (it = 0; it < countof(txt); it++) txt[it] = &txt_sorted[it]; // shuffle txt for (it = 0; it < countof(txt); it++) { n = rand() % countof(txt); tmp = txt[n]; txt[n] = txt[it]; txt[it] = tmp; } for (it = 0; it < countof(txt); it++) { mget_vector_insert_sorted(v, txt[it], sizeof(struct ENTRY)); } for (it = 0; it < countof(txt); it++) { struct ENTRY *e = mget_vector_get(v, it); if (!strcmp(e->txt,txt_sorted[it].txt)) ok++; else failed++; } mget_vector_free(&v); }
void job_create_parts(JOB *job) { PART part; mget_metalink_t *metalink; ssize_t fsize; int it; if (!job || !(metalink = job->metalink)) return; memset(&part, 0, sizeof(PART)); // create space to hold enough parts if (!job->parts) job->parts = mget_vector_create(mget_vector_size(metalink->pieces), 4, NULL); else mget_vector_clear(job->parts); fsize = metalink->size; for (it = 0; it < mget_vector_size(metalink->pieces); it++) { mget_metalink_piece_t *piece = mget_vector_get(metalink->pieces, it); if (fsize >= piece->length) { part.length = piece->length; } else { part.length = fsize; } part.id = it + 1; mget_vector_add(job->parts, &part, sizeof(PART)); part.position += part.length; fsize -= piece->length; } }
static void test_stringmap(void) { mget_stringmap_t *m; char key[128], value[128], *val; int run, it; size_t valuesize; // the initial size of 16 forces the internal reshashing function to be called twice m = mget_stringmap_create(16); for (run = 0; run < 2; run++) { if (run) { mget_stringmap_clear(m); mget_stringmap_sethashfunc(m, hash_txt); } for (it = 0; it < 26; it++) { sprintf(key, "http://www.example.com/subdir/%d.html", it); valuesize = sprintf(value, "%d.html", it); if (mget_stringmap_put(m, key, value, valuesize + 1)) { failed++; info_printf("stringmap_put(%s) returns unexpected old value\n", key); } else ok++; } if ((it = mget_stringmap_size(m)) != 26) { failed++; info_printf("stringmap_size() returned %d (expected %d)\n", it, 26); } else ok++; // now, look up every single entry for (it = 0; it < 26; it++) { sprintf(key, "http://www.example.com/subdir/%d.html", it); sprintf(value, "%d.html", it); if (!(val = mget_stringmap_get(m, key))) { failed++; info_printf("stringmap_get(%s) didn't find entry\n", key); } else if (strcmp(val, value)) { failed++; info_printf("stringmap_get(%s) found '%s' (expected '%s')\n", key, val, value); } else ok++; } mget_stringmap_clear(m); if ((it = mget_stringmap_size(m)) != 0) { failed++; info_printf("stringmap_size() returned %d (expected 0)\n", it); } else ok++; for (it = 0; it < 26; it++) { sprintf(key, "http://www.example.com/subdir/%d.html", it); valuesize = sprintf(value, "%d.html", it); if (mget_stringmap_put(m, key, value, valuesize + 1)) { failed++; info_printf("stringmap_put(%s) returns unexpected old value\n", key); } else ok++; } if ((it = mget_stringmap_size(m)) != 26) { failed++; info_printf("stringmap_size() returned %d (expected %d)\n", it, 26); } else ok++; // now, remove every single entry for (it = 0; it < 26; it++) { sprintf(key, "http://www.example.com/subdir/%d.html", it); sprintf(value, "%d.html", it); mget_stringmap_remove(m, key); } if ((it = mget_stringmap_size(m)) != 0) { failed++; info_printf("stringmap_size() returned %d (expected 0)\n", it); } else ok++; for (it = 0; it < 26; it++) { sprintf(key, "http://www.example.com/subdir/%d.html", it); valuesize = sprintf(value, "%d.html", it); if (mget_stringmap_put(m, key, value, valuesize + 1)) { failed++; info_printf("stringmap_put(%s) returns unexpected old value\n", key); } else ok++; } if ((it = mget_stringmap_size(m)) != 26) { failed++; info_printf("stringmap_size() returned %d (expected %d)\n", it, 26); } else ok++; } // testing alloc/free in stringmap/hashmap mget_stringmap_clear(m); mget_stringmap_put(m, "thekey", NULL, 0) ? failed++ : ok++; mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++; mget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++; mget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++; mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++; // testing key/value identity alloc/free in stringmap/hashmap mget_stringmap_clear(m); mget_stringmap_put(m, "thekey", NULL, 0) ? failed++ : ok++; mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++; mget_stringmap_put(m, "thekey", "thevalue", 9) ? ok++ : failed++; mget_stringmap_put(m, "thekey", NULL, 0) ? ok++ : failed++; mget_stringmap_free(&m); mget_http_challenge_t challenge; mget_http_parse_challenge("Basic realm=\"test realm\"", &challenge); mget_http_free_challenge(&challenge); mget_vector_t *challenges; challenges = mget_vector_create(2, 2, NULL); mget_vector_set_destructor(challenges, (void(*)(void *))mget_http_free_challenge); mget_http_parse_challenge("Basic realm=\"test realm\"", &challenge); mget_vector_add(challenges, &challenge, sizeof(challenge)); mget_http_free_challenges(&challenges); char *response_text = strdup( "HTTP/1.1 401 Authorization Required\r\n"\ "Date: Sun, 23 Dec 2012 21:03:45 GMT\r\n"\ "Server: Apache/2.2.22 (Debian)\r\n"\ "WWW-Authenticate: Digest realm=\"therealm\", nonce=\"Ip6MaovRBAA=c4af733c51270698260f5d357724c2cbce20fa3d\", algorithm=MD5, domain=\"/prot_digest_md5\", qop=\"auth\"\r\n"\ "Vary: Accept-Encoding\r\n"\ "Content-Length: 476\r\n"\ "Keep-Alive: timeout=5, max=99\r\n"\ "Connection: Keep-Alive\r\n"\ "Content-Type: text/html; charset=iso-8859-1\r\n\r\n"); mget_iri_t *iri = mget_iri_parse("http://localhost/prot_digest_md5/", NULL); mget_http_request_t *req = mget_http_create_request(iri, "GET"); mget_http_response_t *resp = mget_http_parse_response_header(response_text); mget_http_add_credentials(req, mget_vector_get(resp->challenges, 0), "tim", "123"); // for (it=0;it<vec_size(req->lines);it++) { // info_printf("%s\n", (char *)vec_get(req->lines, it)); // } mget_http_free_response(&resp); mget_http_free_request(&req); mget_iri_free(&iri); xfree(response_text); // Authorization: Digest username="******", realm="therealm", nonce="Ip6MaovRBAA=c4af733c51270698260f5d357724c2cbce20fa3d", uri="/prot_digest_md5/", response="a99e2012d507a73dd46eb044d3f4641c", qop=auth, nc=00000001, cnonce="3d20faa1" }
static void test_parse_challenge(void) { static const struct test_data { const char * input; const char * scheme[3]; } test_data[] = { { // simplebasic "Basic realm=\"foo\"", { "Basic", NULL } }, { // simplebasicucase "BASIC REALM=\"foo\"", { "Basic", NULL } }, { // simplebasicucase "Basic , realm=\"foo\"", { "Basic", NULL } }, { // "Basic realm=\"test realm\"", { "Basic", NULL } }, { // "Basic realm=\"test-äöÜ\"", { "Basic", NULL } }, { // "Basic realm=\"basic\", Newauth realm=\"newauth\"", { "Basic", "Newauth", NULL } }, }; mget_vector_t *challenges; mget_http_challenge_t *challenge; // Testcases found here http://greenbytes.de/tech/tc/httpauth/ challenges = mget_vector_create(2, 2, NULL); mget_vector_set_destructor(challenges, (void(*)(void *))mget_http_free_challenge); for (unsigned it = 0; it < countof(test_data); it++) { const struct test_data *t = &test_data[it]; mget_http_parse_challenges(t->input, challenges); for (unsigned nchal = 0; nchal < countof(test_data[0].scheme) && t->scheme[nchal]; nchal++) { challenge = mget_vector_get(challenges, nchal); if (!t->scheme[nchal]) { if (challenge) { failed++; info_printf("Failed [%u]: mget_http_parse_challenges(%s) found %d challenges (expected %d)\n", it, t->input, mget_vector_size(challenges), nchal); } break; } if (!challenge) { failed++; info_printf("Failed [%u]: mget_http_parse_challenges(%s) did not find enough challenges\n", it, t->input); break; } if (!mget_strcasecmp_ascii(challenge->auth_scheme, t->scheme[nchal])) { ok++; } else { failed++; info_printf("Failed [%u]: mget_http_parse_challenges(%s) -> '%s' (expected '%s')\n", it, t->input, challenge->auth_scheme, t->scheme[nchal]); } } mget_vector_clear(challenges); } mget_http_free_challenges(&challenges); }
int job_validate_file(JOB *job) { PART part; mget_metalink_t *metalink; off_t fsize; int fd, rc = -1, it; struct stat st; if (!job || !(metalink = job->metalink)) return 0; memset(&part, 0, sizeof(PART)); // create space to hold enough parts if (!job->parts) job->parts = mget_vector_create(mget_vector_size(metalink->pieces), 4, NULL); else mget_vector_clear(job->parts); fsize = metalink->size; if (mget_vector_size(metalink->hashes) == 0) { // multipart non-metalink download: do not clobber if file has expected size if (stat(metalink->name, &st) == 0 && st.st_size == fsize) { return 1; // we are done } } // truncate file if needed if (stat(metalink->name, &st) == 0 && st.st_size > fsize) { if (truncate(metalink->name, fsize) == -1) error_printf(_("Failed to truncate %s\n from %llu to %llu bytes\n"), metalink->name, (unsigned long long)st.st_size, (unsigned long long)fsize); } if ((fd = open(metalink->name, O_RDONLY)) != -1) { // file exists, check which piece is invalid and requeue it for (it = 0; errno != EINTR && it < mget_vector_size(metalink->hashes); it++) { mget_metalink_hash_t *hash = mget_vector_get(metalink->hashes, it); if ((rc = check_file_fd(hash, fd)) == -1) continue; // hash type not available, try next break; } if (rc == 1) { info_printf(_("Checksum OK for '%s'\n"), metalink->name); return 1; // we are done } else if (rc == -1) { // failed to check file, continue as if file is ok info_printf(_("Failed to build checksum, assuming file to be OK\n")); return 1; // we are done } else info_printf(_("Bad checksum for '%s'\n"), metalink->name); // if (vec_size(metalink->pieces) < 1) // return; for (it = 0; errno != EINTR && it < mget_vector_size(metalink->pieces); it++) { mget_metalink_piece_t *piece = mget_vector_get(metalink->pieces, it); mget_metalink_hash_t *hash = &piece->hash; if (fsize >= piece->length) { part.length = piece->length; } else { part.length = (size_t)fsize; } part.id = it + 1; if ((rc = check_piece_hash(hash, fd, part.position, part.length)) != 1) { info_printf(_("Piece %d/%d not OK - requeuing\n"), it + 1, mget_vector_size(metalink->pieces)); mget_vector_add(job->parts, &part, sizeof(PART)); debug_printf(" need to download %llu bytes from pos=%llu\n", (unsigned long long)part.length, (unsigned long long)part.position); } part.position += part.length; fsize -= piece->length; } close(fd); } else { for (it = 0; it < mget_vector_size(metalink->pieces); it++) { mget_metalink_piece_t *piece = mget_vector_get(metalink->pieces, it); if (fsize >= piece->length) { part.length = piece->length; } else { part.length = fsize; } part.id = it + 1; mget_vector_add(job->parts, &part, sizeof(PART)); part.position += part.length; fsize -= piece->length; } } return 0; }