static bool run_wgetrc (const char *file) { FILE *fp; char *line; int ln; int errcnt = 0; fp = fopen (file, "rb"); if (!fp) { fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name, file, strerror (errno)); return true; /* not a fatal error */ } enable_tilde_expansion = true; ln = 1; while ((line = read_whole_line (fp)) != NULL) { char *com = NULL, *val = NULL; int comind; /* Parse the line. */ switch (parse_line (line, &com, &val, &comind)) { case line_ok: /* If everything is OK, set the value. */ if (!setval_internal (comind, com, val)) { fprintf (stderr, _("%s: Error in %s at line %d.\n"), exec_name, file, ln); ++errcnt; } break; case line_syntax_error: fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"), exec_name, file, ln); ++errcnt; break; case line_unknown_command: fprintf (stderr, _("%s: Unknown command `%s' in %s at line %d.\n"), exec_name, com, file, ln); ++errcnt; break; case line_empty: break; default: abort (); } xfree_null (com); xfree_null (val); xfree (line); ++ln; } enable_tilde_expansion = false; fclose (fp); return errcnt == 0; }
/* Updates the console title with the URL of the current file being transferred. */ void ws_changetitle (const char *url) { xfree_null (title_buf); xfree_null (curr_url); title_buf = xmalloc (strlen (url) + 20); curr_url = xstrdup (url); old_percentage = -1; sprintf (title_buf, "Wget %s", curr_url); SetConsoleTitle (title_buf); }
void cleanup_opengl_features(void) { xfree_null(GL_vendor); xfree_null(GL_version); xfree_null(GL_renderer); xfree_null(GL_glsl_version); if (GL_extensions_dict != NULL) { strdict_destroy(GL_extensions_dict); GL_extensions_dict = NULL; } }
uerr_t retrieve_from_file (const char *file, bool html, int *count) { uerr_t status; struct urlpos *url_list, *cur_url; url_list = (html ? get_urls_html (file, NULL, NULL) : get_urls_file (file)); status = RETROK; /* Suppose everything is OK. */ *count = 0; /* Reset the URL count. */ for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) { char *filename = NULL, *new_file = NULL; int dt; if (cur_url->ignore_when_downloading) continue; if (opt.quota && total_downloaded_bytes > opt.quota) { status = QUOTEXC; break; } if ((opt.recursive || opt.page_requisites) && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url))) { int old_follow_ftp = opt.follow_ftp; /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ if (cur_url->url->scheme == SCHEME_FTP) opt.follow_ftp = 1; status = retrieve_tree (cur_url->url->url); opt.follow_ftp = old_follow_ftp; } else status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive); if (filename && opt.delete_after && file_exists_p (filename)) { DEBUGP (("\ Removing file due to --delete-after in retrieve_from_file():\n")); logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename); if (unlink (filename)) logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); dt &= ~RETROKF; } xfree_null (new_file); xfree_null (filename); }
/* Return the path to the user's .wgetrc. This is either the value of `WGETRC' environment variable, or `$HOME/.wgetrc'. If the `WGETRC' variable exists but the file does not exist, the function will exit(). */ static char * wgetrc_file_name (void) { char *env, *home; char *file = NULL; /* Try the environment. */ env = getenv ("WGETRC"); if (env && *env) { if (!file_exists_p (env)) { fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"), exec_name, env); exit (1); } return xstrdup (env); } /* If that failed, try $HOME/.wgetrc. */ home = home_dir (); if (home) file = aprintf ("%s/.wgetrc", home); xfree_null (home); #ifdef WINDOWS /* Under Windows, if we still haven't found .wgetrc, look for the file `wget.ini' in the directory where `wget.exe' resides; we do this for backward compatibility with previous versions of Wget. SYSTEM_WGETRC should not be defined under WINDOWS. */ if (!file || !file_exists_p (file)) { xfree_null (file); file = NULL; home = ws_mypath (); if (home) file = aprintf ("%s/wget.ini", home); } #endif /* WINDOWS */ if (!file) return NULL; if (!file_exists_p (file)) { xfree (file); return NULL; } return file; }
/* Return the path to the user's .wgetrc. This is either the value of `WGETRC' environment variable, or `$HOME/.wgetrc'. Additionally, for windows, look in the directory where wget.exe resides. */ char * wgetrc_file_name (void) { char *file = wgetrc_env_file_name (); if (file && *file) return file; file = wgetrc_user_file_name (); #ifdef WINDOWS /* Under Windows, if we still haven't found .wgetrc, look for the file `wget.ini' in the directory where `wget.exe' resides; we do this for backward compatibility with previous versions of Wget. SYSTEM_WGETRC should not be defined under WINDOWS. */ if (!file) { char *home = home_dir (); xfree_null (file); file = NULL; home = ws_mypath (); if (home) { file = aprintf ("%s/wget.ini", home); if (!file_exists_p (file)) { xfree (file); file = NULL; } xfree (home); } } #endif /* WINDOWS */ return file; }
/* Check for the existance of '$HOME/.wgetrc' and return it's path if it exists and is set. */ char * wgetrc_user_file_name (void) { char *home; char *file = NULL; /* If that failed, try $HOME/.wgetrc (or equivalent). */ #ifdef __VMS file = "SYS$LOGIN:.wgetrc"; #else /* def __VMS */ home = home_dir (); if (home) file = aprintf ("%s/.wgetrc", home); xfree_null (home); #endif /* def __VMS [else] */ if (!file) return NULL; if (!file_exists_p (file)) { xfree (file); return NULL; } return file; }
void log_cleanup (void) { size_t i; for (i = 0; i < countof (ring); i++) xfree_null (ring[i].buffer); }
static void free_specs (struct robot_specs *specs) { int i; for (i = 0; i < specs->count; i++) xfree (specs->paths[i].path); xfree_null (specs->paths); xfree (specs); }
struct urlpos * get_urls_html (const char *file, const char *url, bool *meta_disallow_follow, struct iri *iri) { struct file_memory *fm; struct map_context ctx; int flags; /* Load the file. */ fm = wget_read_file (file); if (!fm) { logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno)); return NULL; } DEBUGP (("Loaded %s (size %s).\n", file, number_to_static_string (fm->length))); ctx.text = fm->content; ctx.head = NULL; ctx.base = NULL; ctx.parent_base = url ? url : opt.base_href; ctx.document_file = file; ctx.nofollow = false; if (!interesting_tags) init_interesting (); /* Specify MHT_TRIM_VALUES because of buggy HTML generators that generate <a href=" foo"> instead of <a href="foo"> (browsers ignore spaces as well.) If you really mean space, use &32; or %20. MHT_TRIM_VALUES also causes squashing of embedded newlines, e.g. in <img src="foo.[newline]html">. Such newlines are also ignored by IE and Mozilla and are presumably introduced by writing HTML with editors that force word wrap. */ flags = MHT_TRIM_VALUES; if (opt.strict_comments) flags |= MHT_STRICT_COMMENTS; /* the NULL here used to be interesting_tags */ map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags, NULL, interesting_attributes); /* If meta charset isn't null, override content encoding */ if (iri && meta_charset) set_content_encoding (iri, meta_charset); DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow)); if (meta_disallow_follow) *meta_disallow_follow = ctx.nofollow; xfree_null (ctx.base); wget_read_file_free (fm); return ctx.head; }
static void openssl_close (int fd, void *arg) { struct openssl_transport_context *ctx = arg; SSL *conn = ctx->conn; SSL_shutdown (conn); SSL_free (conn); xfree_null (ctx->last_error); xfree (ctx); close (fd); DEBUGP (("Closed %d/SSL 0x%0*lx\n", fd, PTR_FORMAT (conn))); }
static const char * openssl_errstr (int fd, void *arg) { struct openssl_transport_context *ctx = arg; unsigned long errcode; char *errmsg = NULL; int msglen = 0; /* If there are no SSL-specific errors, just return NULL. */ if ((errcode = ERR_get_error ()) == 0) return NULL; /* Get rid of previous contents of ctx->last_error, if any. */ xfree_null (ctx->last_error); /* Iterate over OpenSSL's error stack and accumulate errors in the last_error buffer, separated by "; ". This is better than using a static buffer, which *always* takes up space (and has to be large, to fit more than one error message), whereas these allocations are only performed when there is an actual error. */ for (;;) { const char *str = ERR_error_string (errcode, NULL); int len = strlen (str); /* Allocate space for the existing message, plus two more chars for the "; " separator and one for the terminating \0. */ errmsg = xrealloc (errmsg, msglen + len + 2 + 1); memcpy (errmsg + msglen, str, len); msglen += len; /* Get next error and bail out if there are no more. */ errcode = ERR_get_error (); if (errcode == 0) break; errmsg[msglen++] = ';'; errmsg[msglen++] = ' '; } errmsg[msglen] = '\0'; /* Store the error in ctx->last_error where openssl_close will eventually find it and free it. */ ctx->last_error = errmsg; return errmsg; }
static void openssl_close (int fd, void *arg) { struct openssl_transport_context *ctx = arg; SSL *conn = ctx->conn; SSL_shutdown (conn); SSL_free (conn); xfree_null (ctx->last_error); xfree (ctx); #if defined(WINDOWS) || defined(MSDOS) closesocket (fd); #else close (fd); #endif DEBUGP (("Closed %d/SSL 0x%0*lx\n", fd, PTR_FORMAT (conn))); }
uerr_t retrieve_from_file (const char *file, bool html, int *count) { uerr_t status; struct urlpos *url_list, *cur_url; struct iri *iri = iri_new(); char *input_file, *url_file = NULL; const char *url = file; status = RETROK; /* Suppose everything is OK. */ *count = 0; /* Reset the URL count. */ /* sXXXav : Assume filename and links in the file are in the locale */ set_uri_encoding (iri, opt.locale, true); set_content_encoding (iri, opt.locale); if (url_valid_scheme (url)) { int dt,url_err; uerr_t status; struct url *url_parsed = url_parse (url, &url_err, iri, true); if (!url_parsed) { char *error = url_error (url, url_err); logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error); xfree (error); return URLERROR; } if (!opt.base_href) opt.base_href = xstrdup (url); status = retrieve_url (url_parsed, url, &url_file, NULL, NULL, &dt, false, iri, true); url_free (url_parsed); if (!url_file || (status != RETROK)) return status; if (dt & TEXTHTML) html = true; /* If we have a found a content encoding, use it. * ( == is okay, because we're checking for identical object) */ if (iri->content_encoding != opt.locale) set_uri_encoding (iri, iri->content_encoding, false); /* Reset UTF-8 encode status */ iri->utf8_encode = opt.enable_iri; xfree_null (iri->orig_url); iri->orig_url = NULL; input_file = url_file; } else input_file = (char *) file; url_list = (html ? get_urls_html (input_file, NULL, NULL, iri) : get_urls_file (input_file)); xfree_null (url_file); for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) { char *filename = NULL, *new_file = NULL; int dt; struct iri *tmpiri = iri_dup (iri); struct url *parsed_url = NULL; if (cur_url->ignore_when_downloading) continue; if (opt.quota && total_downloaded_bytes > opt.quota) { status = QUOTEXC; break; } parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true); if ((opt.recursive || opt.page_requisites) && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url))) { int old_follow_ftp = opt.follow_ftp; /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ if (cur_url->url->scheme == SCHEME_FTP) opt.follow_ftp = 1; status = retrieve_tree (parsed_url ? parsed_url : cur_url->url, tmpiri); opt.follow_ftp = old_follow_ftp; } else status = retrieve_url (parsed_url ? parsed_url : cur_url->url, cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive, tmpiri, true); if (parsed_url) url_free (parsed_url); if (filename && opt.delete_after && file_exists_p (filename)) { DEBUGP (("\ Removing file due to --delete-after in retrieve_from_file():\n")); logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename); if (unlink (filename)) logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); dt &= ~RETROKF; } xfree_null (new_file); xfree_null (filename); iri_free (tmpiri); }
uerr_t retrieve_url (struct url * orig_parsed, const char *origurl, char **file, char **newloc, const char *refurl, int *dt, bool recursive, struct iri *iri, bool register_status) { uerr_t result; char *url; bool location_changed; bool iri_fallbacked = 0; int dummy; char *mynewloc, *proxy; struct url *u = orig_parsed, *proxy_url; int up_error_code; /* url parse error code */ char *local_file; int redirection_count = 0; bool post_data_suspended = false; char *saved_post_data = NULL; char *saved_post_file_name = NULL; /* If dt is NULL, use local storage. */ if (!dt) { dt = &dummy; dummy = 0; } url = xstrdup (origurl); if (newloc) *newloc = NULL; if (file) *file = NULL; if (!refurl) refurl = opt.referer; redirected: /* (also for IRI fallbacking) */ result = NOCONERROR; mynewloc = NULL; local_file = NULL; proxy_url = NULL; proxy = getproxy (u); if (proxy) { struct iri *pi = iri_new (); set_uri_encoding (pi, opt.locale, true); pi->utf8_encode = false; /* Parse the proxy URL. */ proxy_url = url_parse (proxy, &up_error_code, NULL, true); if (!proxy_url) { char *error = url_error (proxy, up_error_code); logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"), proxy, error); xfree (url); xfree (error); RESTORE_POST_DATA; result = PROXERR; goto bail; } if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme) { logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy); url_free (proxy_url); xfree (url); RESTORE_POST_DATA; result = PROXERR; goto bail; } } if (u->scheme == SCHEME_HTTP #ifdef HAVE_SSL || u->scheme == SCHEME_HTTPS #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt, proxy_url, iri); } else if (u->scheme == SCHEME_FTP) { /* If this is a redirection, temporarily turn off opt.ftp_glob and opt.recursive, both being undesirable when following redirects. */ bool oldrec = recursive, glob = opt.ftp_glob; if (redirection_count) oldrec = glob = false; result = ftp_loop (u, &local_file, dt, proxy_url, recursive, glob); recursive = oldrec; /* There is a possibility of having HTTP being redirected to FTP. In these cases we must decide whether the text is HTML according to the suffix. The HTML suffixes are `.html', `.htm' and a few others, case-insensitive. */ if (redirection_count && local_file && u->scheme == SCHEME_FTP) { if (has_html_suffix_p (local_file)) *dt |= TEXTHTML; } } if (proxy_url) { url_free (proxy_url); proxy_url = NULL; } location_changed = (result == NEWLOCATION || result == NEWLOCATION_KEEP_POST); if (location_changed) { char *construced_newloc; struct url *newloc_parsed; assert (mynewloc != NULL); if (local_file) xfree (local_file); /* The HTTP specs only allow absolute URLs to appear in redirects, but a ton of boneheaded webservers and CGIs out there break the rules and use relative URLs, and popular browsers are lenient about this, so wget should be too. */ construced_newloc = uri_merge (url, mynewloc); xfree (mynewloc); mynewloc = construced_newloc; /* Reset UTF-8 encoding state, keep the URI encoding and reset the content encoding. */ iri->utf8_encode = opt.enable_iri; set_content_encoding (iri, NULL); xfree_null (iri->orig_url); iri->orig_url = NULL; /* Now, see if this new location makes sense. */ newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true); if (!newloc_parsed) { char *error = url_error (mynewloc, up_error_code); logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), error); if (orig_parsed != u) { url_free (u); } xfree (url); xfree (mynewloc); xfree (error); RESTORE_POST_DATA; goto bail; } /* Now mynewloc will become newloc_parsed->url, because if the Location contained relative paths like .././something, we don't want that propagating as url. */ xfree (mynewloc); mynewloc = xstrdup (newloc_parsed->url); /* Check for max. number of redirections. */ if (++redirection_count > opt.max_redirect) { logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"), opt.max_redirect); url_free (newloc_parsed); if (orig_parsed != u) { url_free (u); } xfree (url); xfree (mynewloc); RESTORE_POST_DATA; result = WRONGCODE; goto bail; } xfree (url); url = mynewloc; if (orig_parsed != u) { url_free (u); } u = newloc_parsed; /* If we're being redirected from POST, and we received a redirect code different than 307, we don't want to POST again. Many requests answer POST with a redirection to an index page; that redirection is clearly a GET. We "suspend" POST data for the duration of the redirections, and restore it when we're done. RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect specifically to preserve the method of the request. */ if (result != NEWLOCATION_KEEP_POST && !post_data_suspended) SUSPEND_POST_DATA; goto redirected; } /* Try to not encode in UTF-8 if fetching failed */ if (!(*dt & RETROKF) && iri->utf8_encode) { iri->utf8_encode = false; if (orig_parsed != u) { url_free (u); } u = url_parse (origurl, NULL, iri, true); if (u) { DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url))); url = xstrdup (u->url); iri_fallbacked = 1; goto redirected; } else DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url))); } if (local_file && u && *dt & RETROKF) { register_download (u->url, local_file); if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url)) register_redirection (origurl, u->url); if (*dt & TEXTHTML) register_html (local_file); if (*dt & TEXTCSS) register_css (local_file); } if (file) *file = local_file ? local_file : NULL; else xfree_null (local_file); if (orig_parsed != u) { url_free (u); } if (redirection_count || iri_fallbacked) { if (newloc) *newloc = url; else xfree (url); } else { if (newloc) *newloc = NULL; xfree (url); } RESTORE_POST_DATA; bail: if (register_status) inform_exit_status (result); return result; }
char * fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize) { long bufsize = sizehint; char *hunk = xmalloc (bufsize); int tail = 0; /* tail position in HUNK */ assert (!maxsize || maxsize >= bufsize); while (1) { const char *end; int pklen, rdlen, remain; /* First, peek at the available data. */ pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1); if (pklen < 0) { xfree (hunk); return NULL; } end = terminator (hunk, hunk + tail, pklen); if (end) { /* The data contains the terminator: we'll drain the data up to the end of the terminator. */ remain = end - (hunk + tail); assert (remain >= 0); if (remain == 0) { /* No more data needs to be read. */ hunk[tail] = '\0'; return hunk; } if (bufsize - 1 < tail + remain) { bufsize = tail + remain + 1; hunk = xrealloc (hunk, bufsize); } } else /* No terminator: simply read the data we know is (or should be) available. */ remain = pklen; /* Now, read the data. Note that we make no assumptions about how much data we'll get. (Some TCP stacks are notorious for read returning less data than the previous MSG_PEEK.) */ rdlen = fd_read (fd, hunk + tail, remain, 0); if (rdlen < 0) { xfree_null (hunk); return NULL; } tail += rdlen; hunk[tail] = '\0'; if (rdlen == 0) { if (tail == 0) { /* EOF without anything having been read */ xfree (hunk); errno = 0; return NULL; } else /* EOF seen: return the data we've read. */ return hunk; } if (end && rdlen == remain) /* The terminator was seen and the remaining data drained -- we got what we came for. */ return hunk; /* Keep looping until all the data arrives. */ if (tail == bufsize - 1) { /* Double the buffer size, but refuse to allocate more than MAXSIZE bytes. */ if (maxsize && bufsize >= maxsize) { xfree (hunk); errno = ENOMEM; return NULL; } bufsize <<= 1; if (maxsize && bufsize > maxsize) bufsize = maxsize; hunk = xrealloc (hunk, bufsize); } } }
/* Convert the Un*x-ish style directory listing stored in FILE to a linked list of fileinfo (system-independent) entries. The contents of FILE are considered to be produced by the standard Unix `ls -la' output (whatever that might be). BSD (no group) and SYSV (with group) listings are handled. The time stamps are stored in a separate variable, time_t compatible (I hope). The timezones are ignored. */ static struct fileinfo * ftp_parse_unix_ls (const char *file, int ignore_perms) { FILE *fp; static const char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; int next, len, i, error, ignore; int year, month, day; /* for time analysis */ int hour, min, sec, ptype; struct tm timestruct, *tnow; time_t timenow; size_t bufsize = 0; char *line = NULL, *tok, *ptok; /* tokenizer */ struct fileinfo *dir, *l, cur; /* list creation */ fp = fopen (file, "rb"); if (!fp) { logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno)); return NULL; } dir = l = NULL; /* Line loop to end of file: */ while ((len = getline (&line, &bufsize, fp)) > 0) { len = clean_line (line, len); /* Skip if total... */ if (!strncasecmp (line, "total", 5)) continue; /* Get the first token (permissions). */ tok = strtok (line, " "); if (!tok) continue; cur.name = NULL; cur.linkto = NULL; /* Decide whether we deal with a file or a directory. */ switch (*tok) { case '-': cur.type = FT_PLAINFILE; DEBUGP (("PLAINFILE; ")); break; case 'd': cur.type = FT_DIRECTORY; DEBUGP (("DIRECTORY; ")); break; case 'l': cur.type = FT_SYMLINK; DEBUGP (("SYMLINK; ")); break; default: cur.type = FT_UNKNOWN; DEBUGP (("UNKNOWN; ")); break; } if (ignore_perms) { switch (cur.type) { case FT_PLAINFILE: cur.perms = 0644; break; case FT_DIRECTORY: cur.perms = 0755; break; default: /*cur.perms = 1023;*/ /* #### What is this? --hniksic */ cur.perms = 0644; } DEBUGP (("implicit perms %0o; ", cur.perms)); } else { cur.perms = symperms (tok + 1); DEBUGP (("perms %0o; ", cur.perms)); } error = ignore = 0; /* Erroneous and ignoring entries are treated equally for now. */ year = hour = min = sec = 0; /* Silence the compiler. */ month = day = 0; ptype = TT_DAY; next = -1; /* While there are tokens on the line, parse them. Next is the number of tokens left until the filename. Use the month-name token as the "anchor" (the place where the position wrt the file name is "known"). When a month name is encountered, `next' is set to 5. Also, the preceding characters are parsed to get the file size. This tactic is quite dubious when it comes to internationalization issues (non-English month names), but it works for now. */ tok = line; while (ptok = tok, (tok = strtok (NULL, " ")) != NULL) { --next; if (next < 0) /* a month name was not encountered */ { for (i = 0; i < 12; i++) if (!strcasecmp (tok, months[i])) break; /* If we got a month, it means the token before it is the size, and the filename is three tokens away. */ if (i != 12) { wgint size; /* Parse the previous token with str_to_wgint. */ if (ptok == line) { /* Something has gone wrong during parsing. */ error = 1; break; } errno = 0; size = str_to_wgint (ptok, NULL, 10); if (size == WGINT_MAX && errno == ERANGE) /* Out of range -- ignore the size. #### Should we refuse to start the download. */ cur.size = 0; else cur.size = size; DEBUGP (("size: %s; ", number_to_static_string(cur.size))); month = i; next = 5; DEBUGP (("month: %s; ", months[month])); } } else if (next == 4) /* days */ { if (tok[1]) /* two-digit... */ day = 10 * (*tok - '0') + tok[1] - '0'; else /* ...or one-digit */ day = *tok - '0'; DEBUGP (("day: %d; ", day)); } else if (next == 3) { /* This ought to be either the time, or the year. Let's be flexible! If we have a number x, it's a year. If we have x:y, it's hours and minutes. If we have x:y:z, z are seconds. */ year = 0; min = hour = sec = 0; /* We must deal with digits. */ if (c_isdigit (*tok)) { /* Suppose it's year. */ for (; c_isdigit (*tok); tok++) year = (*tok - '0') + 10 * year; if (*tok == ':') { /* This means these were hours! */ hour = year; year = 0; ptype = TT_HOUR_MIN; ++tok; /* Get the minutes... */ for (; c_isdigit (*tok); tok++) min = (*tok - '0') + 10 * min; if (*tok == ':') { /* ...and the seconds. */ ++tok; for (; c_isdigit (*tok); tok++) sec = (*tok - '0') + 10 * sec; } } } if (year) DEBUGP (("year: %d (no tm); ", year)); else DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec)); } else if (next == 2) /* The file name */ { int fnlen; char *p; /* Since the file name may contain a SPC, it is possible for strtok to handle it wrong. */ fnlen = strlen (tok); if (fnlen < len - (tok - line)) { /* So we have a SPC in the file name. Restore the original. */ tok[fnlen] = ' '; /* If the file is a symbolic link, it should have a ` -> ' somewhere. */ if (cur.type == FT_SYMLINK) { p = strstr (tok, " -> "); if (!p) { error = 1; break; } cur.linkto = xstrdup (p + 4); DEBUGP (("link to: %s\n", cur.linkto)); /* And separate it from the file name. */ *p = '\0'; } } /* If we have the filename, add it to the list of files or directories. */ /* "." and ".." are an exception! */ if (!strcmp (tok, ".") || !strcmp (tok, "..")) { DEBUGP (("\nIgnoring `.' and `..'; ")); ignore = 1; break; } /* Some FTP sites choose to have ls -F as their default LIST output, which marks the symlinks with a trailing `@', directory names with a trailing `/' and executables with a trailing `*'. This is no problem unless encountering a symbolic link ending with `@', or an executable ending with `*' on a server without default -F output. I believe these cases are very rare. */ fnlen = strlen (tok); /* re-calculate `fnlen' */ cur.name = xmalloc (fnlen + 1); memcpy (cur.name, tok, fnlen + 1); if (fnlen) { if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/') { cur.name[fnlen - 1] = '\0'; DEBUGP (("trailing `/' on dir.\n")); } else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@') { cur.name[fnlen - 1] = '\0'; DEBUGP (("trailing `@' on link.\n")); } else if (cur.type == FT_PLAINFILE && (cur.perms & 0111) && cur.name[fnlen - 1] == '*') { cur.name[fnlen - 1] = '\0'; DEBUGP (("trailing `*' on exec.\n")); } } /* if (fnlen) */ else error = 1; break; } else abort (); } /* while */ if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto)) error = 1; DEBUGP (("%s\n", cur.name ? cur.name : "")); if (error || ignore) { DEBUGP (("Skipping.\n")); xfree_null (cur.name); xfree_null (cur.linkto); continue; } if (!dir) { l = dir = xnew (struct fileinfo); memcpy (l, &cur, sizeof (cur)); l->prev = l->next = NULL; } else {
uerr_t retrieve_url (const char *origurl, char **file, char **newloc, const char *refurl, int *dt, bool recursive) { uerr_t result; char *url; bool location_changed; int dummy; char *mynewloc, *proxy; struct url *u, *proxy_url; int up_error_code; /* url parse error code */ char *local_file; int redirection_count = 0; bool post_data_suspended = false; char *saved_post_data = NULL; char *saved_post_file_name = NULL; /* If dt is NULL, use local storage. */ if (!dt) { dt = &dummy; dummy = 0; } url = xstrdup (origurl); if (newloc) *newloc = NULL; if (file) *file = NULL; u = url_parse (url, &up_error_code); if (!u) { logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code)); xfree (url); return URLERROR; } if (!refurl) refurl = opt.referer; redirected: result = NOCONERROR; mynewloc = NULL; local_file = NULL; proxy_url = NULL; proxy = getproxy (u); if (proxy) { /* Parse the proxy URL. */ proxy_url = url_parse (proxy, &up_error_code); if (!proxy_url) { logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"), proxy, url_error (up_error_code)); xfree (url); RESTORE_POST_DATA; return PROXERR; } if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme) { logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy); url_free (proxy_url); xfree (url); RESTORE_POST_DATA; return PROXERR; } } if (u->scheme == SCHEME_HTTP #ifdef HAVE_SSL || u->scheme == SCHEME_HTTPS #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url); } else if (u->scheme == SCHEME_FTP) { /* If this is a redirection, temporarily turn off opt.ftp_glob and opt.recursive, both being undesirable when following redirects. */ bool oldrec = recursive, glob = opt.ftp_glob; if (redirection_count) oldrec = glob = false; result = ftp_loop (u, dt, proxy_url, recursive, glob); recursive = oldrec; /* There is a possibility of having HTTP being redirected to FTP. In these cases we must decide whether the text is HTML according to the suffix. The HTML suffixes are `.html', `.htm' and a few others, case-insensitive. */ if (redirection_count && local_file && u->scheme == SCHEME_FTP) { if (has_html_suffix_p (local_file)) *dt |= TEXTHTML; } } if (proxy_url) { url_free (proxy_url); proxy_url = NULL; } location_changed = (result == NEWLOCATION); if (location_changed) { char *construced_newloc; struct url *newloc_parsed; assert (mynewloc != NULL); if (local_file) xfree (local_file); /* The HTTP specs only allow absolute URLs to appear in redirects, but a ton of boneheaded webservers and CGIs out there break the rules and use relative URLs, and popular browsers are lenient about this, so wget should be too. */ construced_newloc = uri_merge (url, mynewloc); xfree (mynewloc); mynewloc = construced_newloc; /* Now, see if this new location makes sense. */ newloc_parsed = url_parse (mynewloc, &up_error_code); if (!newloc_parsed) { logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), url_error (up_error_code)); url_free (u); xfree (url); xfree (mynewloc); RESTORE_POST_DATA; return result; } /* Now mynewloc will become newloc_parsed->url, because if the Location contained relative paths like .././something, we don't want that propagating as url. */ xfree (mynewloc); mynewloc = xstrdup (newloc_parsed->url); /* Check for max. number of redirections. */ if (++redirection_count > opt.max_redirect) { logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"), opt.max_redirect); url_free (newloc_parsed); url_free (u); xfree (url); xfree (mynewloc); RESTORE_POST_DATA; return WRONGCODE; } xfree (url); url = mynewloc; url_free (u); u = newloc_parsed; /* If we're being redirected from POST, we don't want to POST again. Many requests answer POST with a redirection to an index page; that redirection is clearly a GET. We "suspend" POST data for the duration of the redirections, and restore it when we're done. */ if (!post_data_suspended) SUSPEND_POST_DATA; goto redirected; } if (local_file) { if (*dt & RETROKF) { register_download (u->url, local_file); if (redirection_count && 0 != strcmp (origurl, u->url)) register_redirection (origurl, u->url); if (*dt & TEXTHTML) register_html (u->url, local_file); } } if (file) *file = local_file ? local_file : NULL; else xfree_null (local_file); url_free (u); if (redirection_count) { if (newloc) *newloc = url; else xfree (url); } else { if (newloc) *newloc = NULL; xfree (url); } RESTORE_POST_DATA; return result; }
static void tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx) { char *name = find_attr (tag, "name", NULL); char *http_equiv = find_attr (tag, "http-equiv", NULL); if (http_equiv && 0 == strcasecmp (http_equiv, "refresh")) { /* Some pages use a META tag to specify that the page be refreshed by a new page after a given number of seconds. The general format for this is: <meta http-equiv=Refresh content="NUMBER; URL=index2.html"> So we just need to skip past the "NUMBER; URL=" garbage to get to the URL. */ struct urlpos *entry; int attrind; int timeout = 0; char *p; char *refresh = find_attr (tag, "content", &attrind); if (!refresh) return; for (p = refresh; c_isdigit (*p); p++) timeout = 10 * timeout + *p - '0'; if (*p++ != ';') return; while (c_isspace (*p)) ++p; if (!( c_toupper (*p) == 'U' && c_toupper (*(p + 1)) == 'R' && c_toupper (*(p + 2)) == 'L' && *(p + 3) == '=')) return; p += 4; while (c_isspace (*p)) ++p; entry = append_url (p, ATTR_POS(tag,attrind,ctx), ATTR_SIZE(tag,attrind), ctx); if (entry) { entry->link_refresh_p = 1; entry->refresh_timeout = timeout; entry->link_expect_html = 1; } } else if (http_equiv && 0 == strcasecmp (http_equiv, "content-type")) { /* Handle stuff like: <meta http-equiv="Content-Type" content="text/html; charset=CHARSET"> */ char *mcharset; char *content = find_attr (tag, "content", NULL); if (!content) return; mcharset = parse_charset (content); if (!mcharset) return; xfree_null (meta_charset); meta_charset = mcharset; } else if (name && 0 == strcasecmp (name, "robots")) { /* Handle stuff like: <meta name="robots" content="index,nofollow"> */ char *content = find_attr (tag, "content", NULL); if (!content) return; if (!strcasecmp (content, "none")) ctx->nofollow = true; else { while (*content) { char *end; /* Skip any initial whitespace. */ content += strspn (content, " \f\n\r\t\v"); /* Find the next occurrence of ',' or whitespace, * or the end of the string. */ end = content + strcspn (content, ", \f\n\r\t\v"); if (!strncasecmp (content, "nofollow", end - content)) ctx->nofollow = true; /* Skip past the next comma, if any. */ if (*end == ',') ++end; else { end = strchr (end, ','); if (end) ++end; else end = content + strlen (content); } content = end; } } } }