uerr_t retrieve_from_file (const char *file, bool html, int *count) { uerr_t status; struct urlpos *url_list, *cur_url; url_list = (html ? get_urls_html (file, NULL, NULL) : get_urls_file (file)); status = RETROK; /* Suppose everything is OK. */ *count = 0; /* Reset the URL count. */ for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) { char *filename = NULL, *new_file = NULL; int dt; if (cur_url->ignore_when_downloading) continue; if (opt.quota && total_downloaded_bytes > opt.quota) { status = QUOTEXC; break; } if ((opt.recursive || opt.page_requisites) && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url))) { int old_follow_ftp = opt.follow_ftp; /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ if (cur_url->url->scheme == SCHEME_FTP) opt.follow_ftp = 1; status = retrieve_tree (cur_url->url->url); opt.follow_ftp = old_follow_ftp; } else status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive); if (filename && opt.delete_after && file_exists_p (filename)) { DEBUGP (("\ Removing file due to --delete-after in retrieve_from_file():\n")); logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename); if (unlink (filename)) logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); dt &= ~RETROKF; } xfree_null (new_file); xfree_null (filename); }
std::string ReplayControlerClientHDL::getStatusString(){ std::string s; jderobot::ReplayerStatus value = getproxy()->getStatus(); switch (value){ case jderobot::WAITING: s="Waiting"; break; case jderobot::PLAYING: s="Playing"; break; case jderobot::PAUSED: s="Paused"; break; case jderobot::FINISHED: s="Finished"; break; } return s; }
uerr_t retrieve_from_file (const char *file, bool html, int *count) { uerr_t status; struct urlpos *url_list, *cur_url; struct iri *iri = iri_new(); char *input_file, *url_file = NULL; const char *url = file; status = RETROK; /* Suppose everything is OK. */ *count = 0; /* Reset the URL count. */ /* sXXXav : Assume filename and links in the file are in the locale */ set_uri_encoding (iri, opt.locale, true); set_content_encoding (iri, opt.locale); if (url_valid_scheme (url)) { int dt,url_err; uerr_t status; struct url *url_parsed = url_parse (url, &url_err, iri, true); if (!url_parsed) { char *error = url_error (url, url_err); logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error); xfree (error); return URLERROR; } if (!opt.base_href) opt.base_href = xstrdup (url); status = retrieve_url (url_parsed, url, &url_file, NULL, NULL, &dt, false, iri, true); url_free (url_parsed); if (!url_file || (status != RETROK)) return status; if (dt & TEXTHTML) html = true; /* If we have a found a content encoding, use it. * ( == is okay, because we're checking for identical object) */ if (iri->content_encoding != opt.locale) set_uri_encoding (iri, iri->content_encoding, false); /* Reset UTF-8 encode status */ iri->utf8_encode = opt.enable_iri; xfree_null (iri->orig_url); iri->orig_url = NULL; input_file = url_file; } else input_file = (char *) file; url_list = (html ? get_urls_html (input_file, NULL, NULL, iri) : get_urls_file (input_file)); xfree_null (url_file); for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) { char *filename = NULL, *new_file = NULL; int dt; struct iri *tmpiri = iri_dup (iri); struct url *parsed_url = NULL; if (cur_url->ignore_when_downloading) continue; if (opt.quota && total_downloaded_bytes > opt.quota) { status = QUOTEXC; break; } parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true); if ((opt.recursive || opt.page_requisites) && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url))) { int old_follow_ftp = opt.follow_ftp; /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ if (cur_url->url->scheme == SCHEME_FTP) opt.follow_ftp = 1; status = retrieve_tree (parsed_url ? parsed_url : cur_url->url, tmpiri); opt.follow_ftp = old_follow_ftp; } else status = retrieve_url (parsed_url ? parsed_url : cur_url->url, cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive, tmpiri, true); if (parsed_url) url_free (parsed_url); if (filename && opt.delete_after && file_exists_p (filename)) { DEBUGP (("\ Removing file due to --delete-after in retrieve_from_file():\n")); logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename); if (unlink (filename)) logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); dt &= ~RETROKF; } xfree_null (new_file); xfree_null (filename); iri_free (tmpiri); }
uerr_t retrieve_url (struct url * orig_parsed, const char *origurl, char **file, char **newloc, const char *refurl, int *dt, bool recursive, struct iri *iri, bool register_status) { uerr_t result; char *url; bool location_changed; bool iri_fallbacked = 0; int dummy; char *mynewloc, *proxy; struct url *u = orig_parsed, *proxy_url; int up_error_code; /* url parse error code */ char *local_file; int redirection_count = 0; bool post_data_suspended = false; char *saved_post_data = NULL; char *saved_post_file_name = NULL; /* If dt is NULL, use local storage. */ if (!dt) { dt = &dummy; dummy = 0; } url = xstrdup (origurl); if (newloc) *newloc = NULL; if (file) *file = NULL; if (!refurl) refurl = opt.referer; redirected: /* (also for IRI fallbacking) */ result = NOCONERROR; mynewloc = NULL; local_file = NULL; proxy_url = NULL; proxy = getproxy (u); if (proxy) { struct iri *pi = iri_new (); set_uri_encoding (pi, opt.locale, true); pi->utf8_encode = false; /* Parse the proxy URL. */ proxy_url = url_parse (proxy, &up_error_code, NULL, true); if (!proxy_url) { char *error = url_error (proxy, up_error_code); logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"), proxy, error); xfree (url); xfree (error); RESTORE_POST_DATA; result = PROXERR; goto bail; } if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme) { logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy); url_free (proxy_url); xfree (url); RESTORE_POST_DATA; result = PROXERR; goto bail; } } if (u->scheme == SCHEME_HTTP #ifdef HAVE_SSL || u->scheme == SCHEME_HTTPS #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt, proxy_url, iri); } else if (u->scheme == SCHEME_FTP) { /* If this is a redirection, temporarily turn off opt.ftp_glob and opt.recursive, both being undesirable when following redirects. */ bool oldrec = recursive, glob = opt.ftp_glob; if (redirection_count) oldrec = glob = false; result = ftp_loop (u, &local_file, dt, proxy_url, recursive, glob); recursive = oldrec; /* There is a possibility of having HTTP being redirected to FTP. In these cases we must decide whether the text is HTML according to the suffix. The HTML suffixes are `.html', `.htm' and a few others, case-insensitive. */ if (redirection_count && local_file && u->scheme == SCHEME_FTP) { if (has_html_suffix_p (local_file)) *dt |= TEXTHTML; } } if (proxy_url) { url_free (proxy_url); proxy_url = NULL; } location_changed = (result == NEWLOCATION || result == NEWLOCATION_KEEP_POST); if (location_changed) { char *construced_newloc; struct url *newloc_parsed; assert (mynewloc != NULL); if (local_file) xfree (local_file); /* The HTTP specs only allow absolute URLs to appear in redirects, but a ton of boneheaded webservers and CGIs out there break the rules and use relative URLs, and popular browsers are lenient about this, so wget should be too. */ construced_newloc = uri_merge (url, mynewloc); xfree (mynewloc); mynewloc = construced_newloc; /* Reset UTF-8 encoding state, keep the URI encoding and reset the content encoding. */ iri->utf8_encode = opt.enable_iri; set_content_encoding (iri, NULL); xfree_null (iri->orig_url); iri->orig_url = NULL; /* Now, see if this new location makes sense. */ newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true); if (!newloc_parsed) { char *error = url_error (mynewloc, up_error_code); logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), error); if (orig_parsed != u) { url_free (u); } xfree (url); xfree (mynewloc); xfree (error); RESTORE_POST_DATA; goto bail; } /* Now mynewloc will become newloc_parsed->url, because if the Location contained relative paths like .././something, we don't want that propagating as url. */ xfree (mynewloc); mynewloc = xstrdup (newloc_parsed->url); /* Check for max. number of redirections. */ if (++redirection_count > opt.max_redirect) { logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"), opt.max_redirect); url_free (newloc_parsed); if (orig_parsed != u) { url_free (u); } xfree (url); xfree (mynewloc); RESTORE_POST_DATA; result = WRONGCODE; goto bail; } xfree (url); url = mynewloc; if (orig_parsed != u) { url_free (u); } u = newloc_parsed; /* If we're being redirected from POST, and we received a redirect code different than 307, we don't want to POST again. Many requests answer POST with a redirection to an index page; that redirection is clearly a GET. We "suspend" POST data for the duration of the redirections, and restore it when we're done. RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect specifically to preserve the method of the request. */ if (result != NEWLOCATION_KEEP_POST && !post_data_suspended) SUSPEND_POST_DATA; goto redirected; } /* Try to not encode in UTF-8 if fetching failed */ if (!(*dt & RETROKF) && iri->utf8_encode) { iri->utf8_encode = false; if (orig_parsed != u) { url_free (u); } u = url_parse (origurl, NULL, iri, true); if (u) { DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url))); url = xstrdup (u->url); iri_fallbacked = 1; goto redirected; } else DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url))); } if (local_file && u && *dt & RETROKF) { register_download (u->url, local_file); if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url)) register_redirection (origurl, u->url); if (*dt & TEXTHTML) register_html (local_file); if (*dt & TEXTCSS) register_css (local_file); } if (file) *file = local_file ? local_file : NULL; else xfree_null (local_file); if (orig_parsed != u) { url_free (u); } if (redirection_count || iri_fallbacked) { if (newloc) *newloc = url; else xfree (url); } else { if (newloc) *newloc = NULL; xfree (url); } RESTORE_POST_DATA; bail: if (register_status) inform_exit_status (result); return result; }
uerr_t retrieve_url (const char *origurl, char **file, char **newloc, const char *refurl, int *dt, bool recursive) { uerr_t result; char *url; bool location_changed; int dummy; char *mynewloc, *proxy; struct url *u, *proxy_url; int up_error_code; /* url parse error code */ char *local_file; int redirection_count = 0; bool post_data_suspended = false; char *saved_post_data = NULL; char *saved_post_file_name = NULL; /* If dt is NULL, use local storage. */ if (!dt) { dt = &dummy; dummy = 0; } url = xstrdup (origurl); if (newloc) *newloc = NULL; if (file) *file = NULL; u = url_parse (url, &up_error_code); if (!u) { logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code)); xfree (url); return URLERROR; } if (!refurl) refurl = opt.referer; redirected: result = NOCONERROR; mynewloc = NULL; local_file = NULL; proxy_url = NULL; proxy = getproxy (u); if (proxy) { /* Parse the proxy URL. */ proxy_url = url_parse (proxy, &up_error_code); if (!proxy_url) { logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"), proxy, url_error (up_error_code)); xfree (url); RESTORE_POST_DATA; return PROXERR; } if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme) { logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy); url_free (proxy_url); xfree (url); RESTORE_POST_DATA; return PROXERR; } } if (u->scheme == SCHEME_HTTP #ifdef HAVE_SSL || u->scheme == SCHEME_HTTPS #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url); } else if (u->scheme == SCHEME_FTP) { /* If this is a redirection, temporarily turn off opt.ftp_glob and opt.recursive, both being undesirable when following redirects. */ bool oldrec = recursive, glob = opt.ftp_glob; if (redirection_count) oldrec = glob = false; result = ftp_loop (u, dt, proxy_url, recursive, glob); recursive = oldrec; /* There is a possibility of having HTTP being redirected to FTP. In these cases we must decide whether the text is HTML according to the suffix. The HTML suffixes are `.html', `.htm' and a few others, case-insensitive. */ if (redirection_count && local_file && u->scheme == SCHEME_FTP) { if (has_html_suffix_p (local_file)) *dt |= TEXTHTML; } } if (proxy_url) { url_free (proxy_url); proxy_url = NULL; } location_changed = (result == NEWLOCATION); if (location_changed) { char *construced_newloc; struct url *newloc_parsed; assert (mynewloc != NULL); if (local_file) xfree (local_file); /* The HTTP specs only allow absolute URLs to appear in redirects, but a ton of boneheaded webservers and CGIs out there break the rules and use relative URLs, and popular browsers are lenient about this, so wget should be too. */ construced_newloc = uri_merge (url, mynewloc); xfree (mynewloc); mynewloc = construced_newloc; /* Now, see if this new location makes sense. */ newloc_parsed = url_parse (mynewloc, &up_error_code); if (!newloc_parsed) { logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), url_error (up_error_code)); url_free (u); xfree (url); xfree (mynewloc); RESTORE_POST_DATA; return result; } /* Now mynewloc will become newloc_parsed->url, because if the Location contained relative paths like .././something, we don't want that propagating as url. */ xfree (mynewloc); mynewloc = xstrdup (newloc_parsed->url); /* Check for max. number of redirections. */ if (++redirection_count > opt.max_redirect) { logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"), opt.max_redirect); url_free (newloc_parsed); url_free (u); xfree (url); xfree (mynewloc); RESTORE_POST_DATA; return WRONGCODE; } xfree (url); url = mynewloc; url_free (u); u = newloc_parsed; /* If we're being redirected from POST, we don't want to POST again. Many requests answer POST with a redirection to an index page; that redirection is clearly a GET. We "suspend" POST data for the duration of the redirections, and restore it when we're done. */ if (!post_data_suspended) SUSPEND_POST_DATA; goto redirected; } if (local_file) { if (*dt & RETROKF) { register_download (u->url, local_file); if (redirection_count && 0 != strcmp (origurl, u->url)) register_redirection (origurl, u->url); if (*dt & TEXTHTML) register_html (u->url, local_file); } } if (file) *file = local_file ? local_file : NULL; else xfree_null (local_file); url_free (u); if (redirection_count) { if (newloc) *newloc = url; else xfree (url); } else { if (newloc) *newloc = NULL; xfree (url); } RESTORE_POST_DATA; return result; }
static int requestthread(http_t *http) { request_t *req = http->request; http->curl = curl_easy_init(); if (!http->curl) abort(); curl_slist *headers = NULL; string proxy; #define setopt(OPT, VAL) \ do \ { \ if (curl_easy_setopt(http->curl, OPT, VAL) != CURLE_OK) \ { \ conoutf_r("curl_easy_setopt() failed: %s:%d", __FILE__, __LINE__); \ http->responsecode = QUERY_ABORTED; \ goto error; \ } \ } while (0) setopt(CURLOPT_CONNECTTIMEOUT, req->connecttimeout); setopt(CURLOPT_TIMEOUT, req->timeout); setopt(CURLOPT_URL, req->request.str()); setopt(CURLOPT_WRITEFUNCTION, &contentcallback); setopt(CURLOPT_WRITEDATA, http); setopt(CURLOPT_FOLLOWLOCATION, 1L); // follow redirects setopt(CURLOPT_FILETIME, 1L); setopt(CURLOPT_NOSIGNAL, 1L); setopt(CURLOPT_SSL_VERIFYPEER, 1L); if (req->cacert) { setopt(CURLOPT_CAPATH, "/dev/null"); setopt(CURLOPT_CAINFO, req->cacert.str()); } if (req->statuscallback) { setopt(CURLOPT_NOPROGRESS, 0L); setopt(CURLOPT_PROGRESSDATA, http); setopt(CURLOPT_PROGRESSFUNCTION, &downloadstatuscallback); } if (!req->headers.empty()) { loopv(req->headers) headers = curl_slist_append(headers, req->headers[i].header); setopt(CURLOPT_HTTPHEADER, headers); } if (req->referer) { setopt(CURLOPT_REFERER, req->referer.str()); setopt(CURLOPT_AUTOREFERER, 1L); // send referer } if (req->useragent) setopt(CURLOPT_USERAGENT, req->useragent.str()); getproxy(proxy, sizeof(proxy)); if (*proxy) setopt(CURLOPT_PROXY, proxy); if (http->speedlimit) setopt(CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)http->speedlimit); // try to avoid re-allocs if (req->expecteddatalen) http->data.reserve(req->expecteddatalen); http->requestresult = curl_easy_perform(http->curl); if (headers) curl_slist_free_all(headers); http->requestok = (http->requestresult == CURLE_OK || http->requestresult == CURLE_WRITE_ERROR); // happens if contentcallback returns 0 - but count it as ok switch (http->requestresult) { case CURLE_WRITE_ERROR: http->responsecode = QUERY_ABORTED; break; case CURLE_SSL_CONNECT_ERROR: case CURLE_SSL_ISSUER_ERROR: case CURLE_SSL_CRL_BADFILE: case CURLE_SSL_CACERT: http->responsecode = SSL_ERROR; break; case CURLE_OPERATION_TIMEDOUT: http->responsecode = TIMEDOUT; break; default: if (curl_easy_getinfo(http->curl, CURLINFO_HTTP_CODE, &http->responsecode) != CURLE_OK) http->responsecode = INVALID_RESPONSE_CODE; } if (req->nullterminatorneeded) http->data.add('\0'); http->data.resize(); // save memory long lastmodified; if (curl_easy_getinfo(http->curl, CURLINFO_FILETIME, &lastmodified) == CURLE_OK && lastmodified > 0) http->lastmodified = lastmodified; error:; curl_easy_cleanup(http->curl); http->curl = NULL; { SDL_Mutex_Locker m(http->requestmutex); http->processed = true; } return 0; }
/* Retrieve the given URL. Decides which loop to call -- HTTP, FTP, or simply copy it with file:// (#### the latter not yet implemented!). */ uerr_t retrieve_url (const char *origurl, char **file, char **newloc, const char *refurl, int *dt) { uerr_t result; char *url; int location_changed, already_redirected, dummy; int local_use_proxy; char *mynewloc, *proxy; struct urlinfo *u; /* If dt is NULL, just ignore it. */ if (!dt) dt = &dummy; url = xstrdup (origurl); if (newloc) *newloc = NULL; if (file) *file = NULL; already_redirected = 0; again: u = newurl (); /* Parse the URL. RFC2068 requires `Location' to contain an absoluteURI, but many sites break this requirement. #### We should be liberal and accept a relative location, too. */ result = parseurl (url, u, already_redirected); if (result != URLOK) { freeurl (u, 1); logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result)); return result; } /* Set the referer. */ if (refurl) u->referer = xstrdup (refurl); else u->referer = NULL; local_use_proxy = USE_PROXY_P (u); if (local_use_proxy) { struct urlinfo *pu = newurl (); /* Copy the original URL to new location. */ memcpy (pu, u, sizeof (*u)); pu->proxy = NULL; /* A minor correction :) */ /* Initialize u to nil. */ memset (u, 0, sizeof (*u)); u->proxy = pu; /* Get the appropriate proxy server, appropriate for the current protocol. */ proxy = getproxy (pu->proto); if (!proxy) { logputs (LOG_NOTQUIET, _("Could not find proxy host.\n")); freeurl (u, 1); return PROXERR; } /* Parse the proxy URL. */ result = parseurl (proxy, u, 0); if (result != URLOK || u->proto != URLHTTP) { if (u->proto == URLHTTP) logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg (result)); else logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy); freeurl (u, 1); return PROXERR; } u->proto = URLHTTP; } assert (u->proto != URLFILE); /* #### Implement me! */ mynewloc = NULL; if (u->proto == URLHTTP) result = http_loop (u, &mynewloc, dt); else if (u->proto == URLFTP) { /* If this is a redirection, we must not allow recursive FTP retrieval, so we save recursion to oldrec, and restore it later. */ int oldrec = opt.recursive; if (already_redirected) opt.recursive = 0; result = ftp_loop (u, dt); opt.recursive = oldrec; /* There is a possibility of having HTTP being redirected to FTP. In these cases we must decide whether the text is HTML according to the suffix. The HTML suffixes are `.html' and `.htm', case-insensitive. #### All of this is, of course, crap. These types should be determined through mailcap. */ if (already_redirected && u->local && (u->proto == URLFTP )) { char *suf = suffix (u->local); if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm"))) *dt |= TEXTHTML; FREE_MAYBE (suf); } } location_changed = (result == NEWLOCATION); if (location_changed) { /* Check for redirection to oneself. */ if (url_equal (url, mynewloc)) { logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"), mynewloc); return WRONGCODE; } if (mynewloc) { free (url); url = mynewloc; } freeurl (u, 1); already_redirected = 1; goto again; } if (file) { if (u->local) *file = xstrdup (u->local); else *file = NULL; } freeurl (u, 1); if (newloc) *newloc = url; else free (url); return result; }