/* Loop through all files in metalink structure and retrieve them. Returns RETROK if all files were downloaded. Returns last retrieval error (from retrieve_url) if some files could not be downloaded. */ uerr_t retrieve_from_metalink (const metalink_t* metalink) { metalink_file_t **mfile_ptr; uerr_t last_retr_err = RETROK; /* Store last encountered retrieve error. */ FILE *_output_stream = output_stream; bool _output_stream_regular = output_stream_regular; char *_output_document = opt.output_document; DEBUGP (("Retrieving from Metalink\n")); /* No files to download. */ if (!metalink->files) return RETROK; if (opt.output_document) { /* We cannot support output_document as we need to compute checksum of downloaded file, and to remove it if the checksum is bad. */ logputs (LOG_NOTQUIET, _("-O not supported for metalink download. Ignoring.\n")); } for (mfile_ptr = metalink->files; *mfile_ptr; mfile_ptr++) { metalink_file_t *mfile = *mfile_ptr; metalink_resource_t **mres_ptr; char *filename = NULL; bool hash_ok = false; uerr_t retr_err = METALINK_MISSING_RESOURCE; /* -1 -> file should be rejected 0 -> could not verify 1 -> verified successfully */ char sig_status = 0; output_stream = NULL; DEBUGP (("Processing metalink file %s...\n", quote (mfile->name))); /* Resources are sorted by priority. */ for (mres_ptr = mfile->resources; *mres_ptr; mres_ptr++) { metalink_resource_t *mres = *mres_ptr; metalink_checksum_t **mchksum_ptr, *mchksum; struct iri *iri; struct url *url; int url_err; if (!RES_TYPE_SUPPORTED (mres->type)) { logprintf (LOG_VERBOSE, _("Resource type %s not supported, ignoring...\n"), quote (mres->type)); continue; } retr_err = METALINK_RETR_ERROR; /* If output_stream is not NULL, then we have failed on previous resource and are retrying. Thus, remove the file. */ if (output_stream) { fclose (output_stream); output_stream = NULL; if (unlink (filename)) logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); xfree (filename); } /* Parse our resource URL. */ iri = iri_new (); set_uri_encoding (iri, opt.locale, true); url = url_parse (mres->url, &url_err, iri, false); if (!url) { char *error = url_error (mres->url, url_err); logprintf (LOG_NOTQUIET, "%s: %s.\n", mres->url, error); xfree (error); inform_exit_status (URLERROR); iri_free (iri); continue; } else { /* Avoid recursive Metalink from HTTP headers. */ bool _metalink_http = opt.metalink_over_http; /* Assure proper local file name regardless of the URL of particular Metalink resource. To do that we create the local file here and put it as output_stream. We restore the original configuration after we are finished with the file. */ output_stream = unique_create (mfile->name, true, &filename); output_stream_regular = true; /* Store the real file name for displaying in messages. */ opt.output_document = filename; opt.metalink_over_http = false; DEBUGP (("Storing to %s\n", filename)); retr_err = retrieve_url (url, mres->url, NULL, NULL, NULL, NULL, opt.recursive, iri, false); opt.metalink_over_http = _metalink_http; } url_free (url); iri_free (iri); if (retr_err == RETROK) { FILE *local_file; /* Check the digest. */ local_file = fopen (filename, "rb"); if (!local_file) { logprintf (LOG_NOTQUIET, _("Could not open downloaded file.\n")); continue; } for (mchksum_ptr = mfile->checksums; *mchksum_ptr; mchksum_ptr++) { char sha256[SHA256_DIGEST_SIZE]; char sha256_txt[2 * SHA256_DIGEST_SIZE + 1]; mchksum = *mchksum_ptr; /* I have seen both variants... */ if (strcasecmp (mchksum->type, "sha256") && strcasecmp (mchksum->type, "sha-256")) { DEBUGP (("Ignoring unsupported checksum type %s.\n", quote (mchksum->type))); continue; } logprintf (LOG_VERBOSE, _("Computing checksum for %s\n"), quote (mfile->name)); sha256_stream (local_file, sha256); wg_hex_to_string (sha256_txt, sha256, SHA256_DIGEST_SIZE); DEBUGP (("Declared hash: %s\n", mchksum->hash)); DEBUGP (("Computed hash: %s\n", sha256_txt)); if (!strcmp (sha256_txt, mchksum->hash)) { logputs (LOG_VERBOSE, _("Checksum matches.\n")); hash_ok = true; } else { logprintf (LOG_NOTQUIET, _("Checksum mismatch for file %s.\n"), quote (mfile->name)); hash_ok = false; } /* Stop as soon as we checked the supported checksum. */ break; } /* Iterate over available checksums. */ fclose (local_file); local_file = NULL; if (!hash_ok) continue; sig_status = 0; /* Not verified. */ #ifdef HAVE_GPGME /* Check the crypto signature. Note that the signtures from Metalink in XML will not be parsed when using libmetalink version older than 0.1.3. Metalink-over-HTTP is not affected by this problem. */ if (mfile->signature) { metalink_signature_t *msig = mfile->signature; gpgme_error_t gpgerr; gpgme_ctx_t gpgctx; gpgme_data_t gpgsigdata, gpgdata; gpgme_verify_result_t gpgres; gpgme_signature_t gpgsig; gpgme_protocol_t gpgprot = GPGME_PROTOCOL_UNKNOWN; int fd = -1; /* Initialize the library - as name suggests. */ gpgme_check_version (NULL); /* Open data file. */ fd = open (filename, O_RDONLY); if (fd == -1) { logputs (LOG_NOTQUIET, _("Could not open downloaded file for signature " "verification.\n")); goto gpg_skip_verification; } /* Assign file descriptor to GPG data structure. */ gpgerr = gpgme_data_new_from_fd (&gpgdata, fd); if (gpgerr != GPG_ERR_NO_ERROR) { logprintf (LOG_NOTQUIET, "GPGME data_new_from_fd: %s\n", gpgme_strerror (gpgerr)); goto gpg_skip_verification; } /* Prepare new GPGME context. */ gpgerr = gpgme_new (&gpgctx); if (gpgerr != GPG_ERR_NO_ERROR) { logprintf (LOG_NOTQUIET, "GPGME new: %s\n", gpgme_strerror (gpgerr)); gpgme_data_release (gpgdata); goto gpg_skip_verification; } DEBUGP (("Verifying signature %s:\n%s\n", quote (msig->mediatype), msig->signature)); /* Check signature type. */ if (!strcmp (msig->mediatype, "application/pgp-signature")) gpgprot = GPGME_PROTOCOL_OpenPGP; else /* Unsupported signature type. */ { gpgme_release (gpgctx); gpgme_data_release (gpgdata); goto gpg_skip_verification; } gpgerr = gpgme_set_protocol (gpgctx, gpgprot); if (gpgerr != GPG_ERR_NO_ERROR) { logprintf (LOG_NOTQUIET, "GPGME set_protocol: %s\n", gpgme_strerror (gpgerr)); gpgme_release (gpgctx); gpgme_data_release (gpgdata); goto gpg_skip_verification; } /* Load the signature. */ gpgerr = gpgme_data_new_from_mem (&gpgsigdata, msig->signature, strlen (msig->signature), 0); if (gpgerr != GPG_ERR_NO_ERROR) { logprintf (LOG_NOTQUIET, _("GPGME data_new_from_mem: %s\n"), gpgme_strerror (gpgerr)); gpgme_release (gpgctx); gpgme_data_release (gpgdata); goto gpg_skip_verification; } /* Verify the signature. */ gpgerr = gpgme_op_verify (gpgctx, gpgsigdata, gpgdata, NULL); if (gpgerr != GPG_ERR_NO_ERROR) { logprintf (LOG_NOTQUIET, _("GPGME op_verify: %s\n"), gpgme_strerror (gpgerr)); gpgme_data_release (gpgsigdata); gpgme_release (gpgctx); gpgme_data_release (gpgdata); goto gpg_skip_verification; } /* Check the results. */ gpgres = gpgme_op_verify_result (gpgctx); if (!gpgres) { logputs (LOG_NOTQUIET, _("GPGME op_verify_result: NULL\n")); gpgme_data_release (gpgsigdata); gpgme_release (gpgctx); gpgme_data_release (gpgdata); goto gpg_skip_verification; } /* The list is null-terminated. */ for (gpgsig = gpgres->signatures; gpgsig; gpgsig = gpgsig->next) { DEBUGP (("Checking signature %s\n", gpgsig->fpr)); if (gpgsig->summary & (GPGME_SIGSUM_VALID | GPGME_SIGSUM_GREEN)) { logputs (LOG_VERBOSE, _("Signature validation suceeded.\n")); sig_status = 1; break; } if (gpgsig->summary & GPGME_SIGSUM_RED) { logputs (LOG_NOTQUIET, _("Invalid signature. Rejecting resource.\n")); sig_status = -1; break; } if (gpgsig->summary == 0 && (gpgsig->status & 0xFFFF) == GPG_ERR_NO_ERROR) { logputs (LOG_VERBOSE, _("Data matches signature, but signature " "is not trusted.\n")); } if ((gpgsig->status & 0xFFFF) != GPG_ERR_NO_ERROR) { logprintf (LOG_NOTQUIET, "GPGME: %s\n", gpgme_strerror (gpgsig->status & 0xFFFF)); } } gpgme_data_release (gpgsigdata); gpgme_release (gpgctx); gpgme_data_release (gpgdata); gpg_skip_verification: if (fd != -1) close (fd); } /* endif (mfile->signature) */ #endif /* Stop if file was downloaded with success. */ if (sig_status >= 0) break; } /* endif RETR_OK. */ } /* Iterate over resources. */ if (retr_err != RETROK) { logprintf (LOG_VERBOSE, _("Failed to download %s. Skipping resource.\n"), quote (mfile->name)); } else if (!hash_ok) { retr_err = METALINK_CHKSUM_ERROR; logprintf (LOG_NOTQUIET, _("File %s retrieved but checksum does not match. " "\n"), quote (mfile->name)); } #ifdef HAVE_GPGME /* Signature will be only validated if hash check was successful. */ else if (sig_status < 0) { retr_err = METALINK_SIG_ERROR; logprintf (LOG_NOTQUIET, _("File %s retrieved but signature does not match. " "\n"), quote (mfile->name)); } #endif last_retr_err = retr_err == RETROK ? last_retr_err : retr_err; /* Remove the file if error encountered or if option specified. Note: the file has been downloaded using *_loop. Therefore, it is not necessary to keep the file for continuated download. */ if ((retr_err != RETROK || opt.delete_after) && filename != NULL && file_exists_p (filename)) { logprintf (LOG_VERBOSE, _("Removing %s.\n"), quote (filename)); if (unlink (filename)) logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); } if (output_stream) { fclose (output_stream); output_stream = NULL; } xfree (filename); } /* Iterate over files. */ /* Restore original values. */ opt.output_document = _output_document; output_stream_regular = _output_stream_regular; output_stream = _output_stream; return last_retr_err; }
uerr_t retrieve_url (struct url * orig_parsed, const char *origurl, char **file, char **newloc, const char *refurl, int *dt, bool recursive, struct iri *iri, bool register_status) { uerr_t result; char *url; bool location_changed; bool iri_fallbacked = 0; int dummy; char *mynewloc, *proxy; struct url *u = orig_parsed, *proxy_url; int up_error_code; /* url parse error code */ char *local_file; int redirection_count = 0; bool post_data_suspended = false; char *saved_post_data = NULL; char *saved_post_file_name = NULL; /* If dt is NULL, use local storage. */ if (!dt) { dt = &dummy; dummy = 0; } url = xstrdup (origurl); if (newloc) *newloc = NULL; if (file) *file = NULL; if (!refurl) refurl = opt.referer; redirected: /* (also for IRI fallbacking) */ result = NOCONERROR; mynewloc = NULL; local_file = NULL; proxy_url = NULL; proxy = getproxy (u); if (proxy) { struct iri *pi = iri_new (); set_uri_encoding (pi, opt.locale, true); pi->utf8_encode = false; /* Parse the proxy URL. */ proxy_url = url_parse (proxy, &up_error_code, NULL, true); if (!proxy_url) { char *error = url_error (proxy, up_error_code); logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"), proxy, error); xfree (url); xfree (error); RESTORE_POST_DATA; result = PROXERR; goto bail; } if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme) { logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy); url_free (proxy_url); xfree (url); RESTORE_POST_DATA; result = PROXERR; goto bail; } } if (u->scheme == SCHEME_HTTP #ifdef HAVE_SSL || u->scheme == SCHEME_HTTPS #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt, proxy_url, iri); } else if (u->scheme == SCHEME_FTP) { /* If this is a redirection, temporarily turn off opt.ftp_glob and opt.recursive, both being undesirable when following redirects. */ bool oldrec = recursive, glob = opt.ftp_glob; if (redirection_count) oldrec = glob = false; result = ftp_loop (u, &local_file, dt, proxy_url, recursive, glob); recursive = oldrec; /* There is a possibility of having HTTP being redirected to FTP. In these cases we must decide whether the text is HTML according to the suffix. The HTML suffixes are `.html', `.htm' and a few others, case-insensitive. */ if (redirection_count && local_file && u->scheme == SCHEME_FTP) { if (has_html_suffix_p (local_file)) *dt |= TEXTHTML; } } if (proxy_url) { url_free (proxy_url); proxy_url = NULL; } location_changed = (result == NEWLOCATION || result == NEWLOCATION_KEEP_POST); if (location_changed) { char *construced_newloc; struct url *newloc_parsed; assert (mynewloc != NULL); if (local_file) xfree (local_file); /* The HTTP specs only allow absolute URLs to appear in redirects, but a ton of boneheaded webservers and CGIs out there break the rules and use relative URLs, and popular browsers are lenient about this, so wget should be too. */ construced_newloc = uri_merge (url, mynewloc); xfree (mynewloc); mynewloc = construced_newloc; /* Reset UTF-8 encoding state, keep the URI encoding and reset the content encoding. */ iri->utf8_encode = opt.enable_iri; set_content_encoding (iri, NULL); xfree_null (iri->orig_url); iri->orig_url = NULL; /* Now, see if this new location makes sense. */ newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true); if (!newloc_parsed) { char *error = url_error (mynewloc, up_error_code); logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), error); if (orig_parsed != u) { url_free (u); } xfree (url); xfree (mynewloc); xfree (error); RESTORE_POST_DATA; goto bail; } /* Now mynewloc will become newloc_parsed->url, because if the Location contained relative paths like .././something, we don't want that propagating as url. */ xfree (mynewloc); mynewloc = xstrdup (newloc_parsed->url); /* Check for max. number of redirections. */ if (++redirection_count > opt.max_redirect) { logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"), opt.max_redirect); url_free (newloc_parsed); if (orig_parsed != u) { url_free (u); } xfree (url); xfree (mynewloc); RESTORE_POST_DATA; result = WRONGCODE; goto bail; } xfree (url); url = mynewloc; if (orig_parsed != u) { url_free (u); } u = newloc_parsed; /* If we're being redirected from POST, and we received a redirect code different than 307, we don't want to POST again. Many requests answer POST with a redirection to an index page; that redirection is clearly a GET. We "suspend" POST data for the duration of the redirections, and restore it when we're done. RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect specifically to preserve the method of the request. */ if (result != NEWLOCATION_KEEP_POST && !post_data_suspended) SUSPEND_POST_DATA; goto redirected; } /* Try to not encode in UTF-8 if fetching failed */ if (!(*dt & RETROKF) && iri->utf8_encode) { iri->utf8_encode = false; if (orig_parsed != u) { url_free (u); } u = url_parse (origurl, NULL, iri, true); if (u) { DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url))); url = xstrdup (u->url); iri_fallbacked = 1; goto redirected; } else DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url))); } if (local_file && u && *dt & RETROKF) { register_download (u->url, local_file); if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url)) register_redirection (origurl, u->url); if (*dt & TEXTHTML) register_html (local_file); if (*dt & TEXTCSS) register_css (local_file); } if (file) *file = local_file ? local_file : NULL; else xfree_null (local_file); if (orig_parsed != u) { url_free (u); } if (redirection_count || iri_fallbacked) { if (newloc) *newloc = url; else xfree (url); } else { if (newloc) *newloc = NULL; xfree (url); } RESTORE_POST_DATA; bail: if (register_status) inform_exit_status (result); return result; }
struct urlpos * get_urls_file (const char *file) { struct file_memory *fm; struct urlpos *head, *tail; const char *text, *text_end; /* Load the file. */ fm = wget_read_file (file); if (!fm) { logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno)); return NULL; } DEBUGP (("Loaded %s (size %s).\n", file, number_to_static_string (fm->length))); head = tail = NULL; text = fm->content; text_end = fm->content + fm->length; while (text < text_end) { int up_error_code; char *url_text; struct urlpos *entry; struct url *url; const char *line_beg = text; const char *line_end = memchr (text, '\n', text_end - text); if (!line_end) line_end = text_end; else ++line_end; text = line_end; /* Strip whitespace from the beginning and end of line. */ while (line_beg < line_end && c_isspace (*line_beg)) ++line_beg; while (line_end > line_beg && c_isspace (*(line_end - 1))) --line_end; if (line_beg == line_end) continue; /* The URL is in the [line_beg, line_end) region. */ /* We must copy the URL to a zero-terminated string, and we can't use alloca because we're in a loop. *sigh*. */ url_text = strdupdelim (line_beg, line_end); if (opt.base_href) { /* Merge opt.base_href with URL. */ char *merged = uri_merge (opt.base_href, url_text); xfree (url_text); url_text = merged; } char *new_url = rewrite_shorthand_url (url_text); if (new_url) { xfree (url_text); url_text = new_url; } url = url_parse (url_text, &up_error_code, NULL, false); if (!url) { char *error = url_error (url_text, up_error_code); logprintf (LOG_NOTQUIET, _("%s: Invalid URL %s: %s\n"), file, url_text, error); xfree (url_text); xfree (error); inform_exit_status (URLERROR); continue; } xfree (url_text); entry = xnew0 (struct urlpos); entry->url = url; if (!head) head = entry; else tail->next = entry; tail = entry; } wget_read_file_free (fm); return head; }