Exemple #1
0
static bool
run_wgetrc (const char *file)
{
  FILE *fp;
  char *line;
  int ln;
  int errcnt = 0;

  fp = fopen (file, "rb");
  if (!fp)
    {
      fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
               file, strerror (errno));
      return true;                      /* not a fatal error */
    }
  enable_tilde_expansion = true;
  ln = 1;
  while ((line = read_whole_line (fp)) != NULL)
    {
      char *com = NULL, *val = NULL;
      int comind;

      /* Parse the line.  */
      switch (parse_line (line, &com, &val, &comind))
        {
        case line_ok:
          /* If everything is OK, set the value.  */
          if (!setval_internal (comind, com, val))
            {
              fprintf (stderr, _("%s: Error in %s at line %d.\n"),
                       exec_name, file, ln);
              ++errcnt;
            }
          break;
        case line_syntax_error:
          fprintf (stderr, _("%s: Syntax error in %s at line %d.\n"),
                   exec_name, file, ln);
          ++errcnt;
          break;
        case line_unknown_command:
          fprintf (stderr, _("%s: Unknown command `%s' in %s at line %d.\n"),
                   exec_name, com, file, ln);
          ++errcnt;
          break;
        case line_empty:
          break;
        default:
          abort ();
        }
      xfree_null (com);
      xfree_null (val);
      xfree (line);
      ++ln;
    }
  enable_tilde_expansion = false;
  fclose (fp);

  return errcnt == 0;
}
Exemple #2
0
/* Updates the console title with the URL of the current file being
   transferred.  */
void
ws_changetitle (const char *url)
{
  xfree_null (title_buf);
  xfree_null (curr_url);
  title_buf = xmalloc (strlen (url) + 20);
  curr_url = xstrdup (url);
  old_percentage = -1;
  sprintf (title_buf, "Wget %s", curr_url);
  SetConsoleTitle (title_buf);
}
Exemple #3
0
void
cleanup_opengl_features(void)
{
	xfree_null(GL_vendor);
	xfree_null(GL_version);
	xfree_null(GL_renderer);
	xfree_null(GL_glsl_version);
	if (GL_extensions_dict != NULL) {
		strdict_destroy(GL_extensions_dict);
		GL_extensions_dict = NULL;
	}
}
Exemple #4
0
uerr_t
retrieve_from_file (const char *file, bool html, int *count)
{
  uerr_t status;
  struct urlpos *url_list, *cur_url;

  url_list = (html ? get_urls_html (file, NULL, NULL)
              : get_urls_file (file));
  status = RETROK;             /* Suppose everything is OK.  */
  *count = 0;                  /* Reset the URL count.  */

  for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
    {
      char *filename = NULL, *new_file = NULL;
      int dt;

      if (cur_url->ignore_when_downloading)
        continue;

      if (opt.quota && total_downloaded_bytes > opt.quota)
        {
          status = QUOTEXC;
          break;
        }
      if ((opt.recursive || opt.page_requisites)
          && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
        {
          int old_follow_ftp = opt.follow_ftp;

          /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
          if (cur_url->url->scheme == SCHEME_FTP)
            opt.follow_ftp = 1;

          status = retrieve_tree (cur_url->url->url);

          opt.follow_ftp = old_follow_ftp;
        }
      else
        status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive);

      if (filename && opt.delete_after && file_exists_p (filename))
        {
          DEBUGP (("\
Removing file due to --delete-after in retrieve_from_file():\n"));
          logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
          if (unlink (filename))
            logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
          dt &= ~RETROKF;
        }

      xfree_null (new_file);
      xfree_null (filename);
    }
Exemple #5
0
/* Return the path to the user's .wgetrc.  This is either the value of
   `WGETRC' environment variable, or `$HOME/.wgetrc'.

   If the `WGETRC' variable exists but the file does not exist, the
   function will exit().  */
static char *
wgetrc_file_name (void)
{
  char *env, *home;
  char *file = NULL;

  /* Try the environment.  */
  env = getenv ("WGETRC");
  if (env && *env)
    {
      if (!file_exists_p (env))
        {
          fprintf (stderr, _("%s: WGETRC points to %s, which doesn't exist.\n"),
                   exec_name, env);
          exit (1);
        }
      return xstrdup (env);
    }

  /* If that failed, try $HOME/.wgetrc.  */
  home = home_dir ();
  if (home)
    file = aprintf ("%s/.wgetrc", home);
  xfree_null (home);

#ifdef WINDOWS
  /* Under Windows, if we still haven't found .wgetrc, look for the file
     `wget.ini' in the directory where `wget.exe' resides; we do this for
     backward compatibility with previous versions of Wget.
     SYSTEM_WGETRC should not be defined under WINDOWS.  */
  if (!file || !file_exists_p (file))
    {
      xfree_null (file);
      file = NULL;
      home = ws_mypath ();
      if (home)
        file = aprintf ("%s/wget.ini", home);
    }
#endif /* WINDOWS */

  if (!file)
    return NULL;
  if (!file_exists_p (file))
    {
      xfree (file);
      return NULL;
    }
  return file;
}
Exemple #6
0
/* Return the path to the user's .wgetrc.  This is either the value of
   `WGETRC' environment variable, or `$HOME/.wgetrc'.

   Additionally, for windows, look in the directory where wget.exe
   resides.  */
char *
wgetrc_file_name (void)
{
  char *file = wgetrc_env_file_name ();
  if (file && *file)
    return file;

  file = wgetrc_user_file_name ();

#ifdef WINDOWS
  /* Under Windows, if we still haven't found .wgetrc, look for the file
     `wget.ini' in the directory where `wget.exe' resides; we do this for
     backward compatibility with previous versions of Wget.
     SYSTEM_WGETRC should not be defined under WINDOWS.  */
  if (!file)
    {
      char *home = home_dir ();
      xfree_null (file);
      file = NULL;
      home = ws_mypath ();
      if (home)
        {
          file = aprintf ("%s/wget.ini", home);
          if (!file_exists_p (file))
            {
              xfree (file);
              file = NULL;
            }
          xfree (home);
        }
    }
#endif /* WINDOWS */

  return file;
}
Exemple #7
0
/* Check for the existance of '$HOME/.wgetrc' and return it's path
   if it exists and is set.  */
char *
wgetrc_user_file_name (void)
{
  char *home;
  char *file = NULL;
  /* If that failed, try $HOME/.wgetrc (or equivalent).  */

#ifdef __VMS
  file = "SYS$LOGIN:.wgetrc";
#else /* def __VMS */
  home = home_dir ();
  if (home)
    file = aprintf ("%s/.wgetrc", home);
  xfree_null (home);
#endif /* def __VMS [else] */

  if (!file)
    return NULL;
  if (!file_exists_p (file))
    {
      xfree (file);
      return NULL;
    }
  return file;
}
Exemple #8
0
void
log_cleanup (void)
{
  size_t i;
  for (i = 0; i < countof (ring); i++)
    xfree_null (ring[i].buffer);
}
Exemple #9
0
static void
free_specs (struct robot_specs *specs)
{
  int i;
  for (i = 0; i < specs->count; i++)
    xfree (specs->paths[i].path);
  xfree_null (specs->paths);
  xfree (specs);
}
struct urlpos *
get_urls_html (const char *file, const char *url, bool *meta_disallow_follow,
               struct iri *iri)
{
  struct file_memory *fm;
  struct map_context ctx;
  int flags;

  /* Load the file. */
  fm = wget_read_file (file);
  if (!fm)
    {
      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
      return NULL;
    }
  DEBUGP (("Loaded %s (size %s).\n", file, number_to_static_string (fm->length)));

  ctx.text = fm->content;
  ctx.head = NULL;
  ctx.base = NULL;
  ctx.parent_base = url ? url : opt.base_href;
  ctx.document_file = file;
  ctx.nofollow = false;

  if (!interesting_tags)
    init_interesting ();

  /* Specify MHT_TRIM_VALUES because of buggy HTML generators that
     generate <a href=" foo"> instead of <a href="foo"> (browsers
     ignore spaces as well.)  If you really mean space, use &32; or
     %20.  MHT_TRIM_VALUES also causes squashing of embedded newlines,
     e.g. in <img src="foo.[newline]html">.  Such newlines are also
     ignored by IE and Mozilla and are presumably introduced by
     writing HTML with editors that force word wrap.  */
  flags = MHT_TRIM_VALUES;
  if (opt.strict_comments)
    flags |= MHT_STRICT_COMMENTS;

  /* the NULL here used to be interesting_tags */
  map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
                 NULL, interesting_attributes);

  /* If meta charset isn't null, override content encoding */
  if (iri && meta_charset)
    set_content_encoding (iri, meta_charset);

  DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
  if (meta_disallow_follow)
    *meta_disallow_follow = ctx.nofollow;

  xfree_null (ctx.base);
  wget_read_file_free (fm);
  return ctx.head;
}
Exemple #11
0
static void
openssl_close (int fd, void *arg)
{
  struct openssl_transport_context *ctx = arg;
  SSL *conn = ctx->conn;

  SSL_shutdown (conn);
  SSL_free (conn);
  xfree_null (ctx->last_error);
  xfree (ctx);

  close (fd);

  DEBUGP (("Closed %d/SSL 0x%0*lx\n", fd, PTR_FORMAT (conn)));
}
Exemple #12
0
static const char *
openssl_errstr (int fd, void *arg)
{
  struct openssl_transport_context *ctx = arg;
  unsigned long errcode;
  char *errmsg = NULL;
  int msglen = 0;

  /* If there are no SSL-specific errors, just return NULL. */
  if ((errcode = ERR_get_error ()) == 0)
    return NULL;

  /* Get rid of previous contents of ctx->last_error, if any.  */
  xfree_null (ctx->last_error);

  /* Iterate over OpenSSL's error stack and accumulate errors in the
     last_error buffer, separated by "; ".  This is better than using
     a static buffer, which *always* takes up space (and has to be
     large, to fit more than one error message), whereas these
     allocations are only performed when there is an actual error.  */

  for (;;)
    {
      const char *str = ERR_error_string (errcode, NULL);
      int len = strlen (str);

      /* Allocate space for the existing message, plus two more chars
         for the "; " separator and one for the terminating \0.  */
      errmsg = xrealloc (errmsg, msglen + len + 2 + 1);
      memcpy (errmsg + msglen, str, len);
      msglen += len;

      /* Get next error and bail out if there are no more. */
      errcode = ERR_get_error ();
      if (errcode == 0)
        break;

      errmsg[msglen++] = ';';
      errmsg[msglen++] = ' ';
    }
  errmsg[msglen] = '\0';

  /* Store the error in ctx->last_error where openssl_close will
     eventually find it and free it.  */
  ctx->last_error = errmsg;

  return errmsg;
}
Exemple #13
0
static void
openssl_close (int fd, void *arg)
{
  struct openssl_transport_context *ctx = arg;
  SSL *conn = ctx->conn;

  SSL_shutdown (conn);
  SSL_free (conn);
  xfree_null (ctx->last_error);
  xfree (ctx);

#if defined(WINDOWS) || defined(MSDOS)
  closesocket (fd);
#else
  close (fd);
#endif

  DEBUGP (("Closed %d/SSL 0x%0*lx\n", fd, PTR_FORMAT (conn)));
}
Exemple #14
0
uerr_t
retrieve_from_file (const char *file, bool html, int *count)
{
  uerr_t status;
  struct urlpos *url_list, *cur_url;
  struct iri *iri = iri_new();

  char *input_file, *url_file = NULL;
  const char *url = file;

  status = RETROK;             /* Suppose everything is OK.  */
  *count = 0;                  /* Reset the URL count.  */

  /* sXXXav : Assume filename and links in the file are in the locale */
  set_uri_encoding (iri, opt.locale, true);
  set_content_encoding (iri, opt.locale);

  if (url_valid_scheme (url))
    {
      int dt,url_err;
      uerr_t status;
      struct url *url_parsed = url_parse (url, &url_err, iri, true);
      if (!url_parsed)
        {
          char *error = url_error (url, url_err);
          logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error);
          xfree (error);
          return URLERROR;
        }

      if (!opt.base_href)
        opt.base_href = xstrdup (url);

      status = retrieve_url (url_parsed, url, &url_file, NULL, NULL, &dt,
                             false, iri, true);
      url_free (url_parsed);

      if (!url_file || (status != RETROK))
        return status;

      if (dt & TEXTHTML)
        html = true;

      /* If we have a found a content encoding, use it.
       * ( == is okay, because we're checking for identical object) */
      if (iri->content_encoding != opt.locale)
	  set_uri_encoding (iri, iri->content_encoding, false);

      /* Reset UTF-8 encode status */
      iri->utf8_encode = opt.enable_iri;
      xfree_null (iri->orig_url);
      iri->orig_url = NULL;

      input_file = url_file;
    }
  else
    input_file = (char *) file;

  url_list = (html ? get_urls_html (input_file, NULL, NULL, iri)
              : get_urls_file (input_file));

  xfree_null (url_file);

  for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
    {
      char *filename = NULL, *new_file = NULL;
      int dt;
      struct iri *tmpiri = iri_dup (iri);
      struct url *parsed_url = NULL;

      if (cur_url->ignore_when_downloading)
        continue;

      if (opt.quota && total_downloaded_bytes > opt.quota)
        {
          status = QUOTEXC;
          break;
        }

      parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true);

      if ((opt.recursive || opt.page_requisites)
          && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
        {
          int old_follow_ftp = opt.follow_ftp;

          /* Turn opt.follow_ftp on in case of recursive FTP retrieval */
          if (cur_url->url->scheme == SCHEME_FTP)
            opt.follow_ftp = 1;

          status = retrieve_tree (parsed_url ? parsed_url : cur_url->url,
                                  tmpiri);

          opt.follow_ftp = old_follow_ftp;
        }
      else
        status = retrieve_url (parsed_url ? parsed_url : cur_url->url,
                               cur_url->url->url, &filename,
                               &new_file, NULL, &dt, opt.recursive, tmpiri,
                               true);

      if (parsed_url)
          url_free (parsed_url);

      if (filename && opt.delete_after && file_exists_p (filename))
        {
          DEBUGP (("\
Removing file due to --delete-after in retrieve_from_file():\n"));
          logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
          if (unlink (filename))
            logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
          dt &= ~RETROKF;
        }

      xfree_null (new_file);
      xfree_null (filename);
      iri_free (tmpiri);
    }
Exemple #15
0
uerr_t
retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
              char **newloc, const char *refurl, int *dt, bool recursive,
              struct iri *iri, bool register_status)
{
  uerr_t result;
  char *url;
  bool location_changed;
  bool iri_fallbacked = 0;
  int dummy;
  char *mynewloc, *proxy;
  struct url *u = orig_parsed, *proxy_url;
  int up_error_code;            /* url parse error code */
  char *local_file;
  int redirection_count = 0;

  bool post_data_suspended = false;
  char *saved_post_data = NULL;
  char *saved_post_file_name = NULL;

  /* If dt is NULL, use local storage.  */
  if (!dt)
    {
      dt = &dummy;
      dummy = 0;
    }
  url = xstrdup (origurl);
  if (newloc)
    *newloc = NULL;
  if (file)
    *file = NULL;

  if (!refurl)
    refurl = opt.referer;

 redirected:
  /* (also for IRI fallbacking) */

  result = NOCONERROR;
  mynewloc = NULL;
  local_file = NULL;
  proxy_url = NULL;

  proxy = getproxy (u);
  if (proxy)
    {
      struct iri *pi = iri_new ();
      set_uri_encoding (pi, opt.locale, true);
      pi->utf8_encode = false;

      /* Parse the proxy URL.  */
      proxy_url = url_parse (proxy, &up_error_code, NULL, true);
      if (!proxy_url)
        {
          char *error = url_error (proxy, up_error_code);
          logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
                     proxy, error);
          xfree (url);
          xfree (error);
          RESTORE_POST_DATA;
          result = PROXERR;
          goto bail;
        }
      if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
        {
          logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
          url_free (proxy_url);
          xfree (url);
          RESTORE_POST_DATA;
          result = PROXERR;
          goto bail;
        }
    }

  if (u->scheme == SCHEME_HTTP
#ifdef HAVE_SSL
      || u->scheme == SCHEME_HTTPS
#endif
      || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
    {
      result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt,
                          proxy_url, iri);
    }
  else if (u->scheme == SCHEME_FTP)
    {
      /* If this is a redirection, temporarily turn off opt.ftp_glob
         and opt.recursive, both being undesirable when following
         redirects.  */
      bool oldrec = recursive, glob = opt.ftp_glob;
      if (redirection_count)
        oldrec = glob = false;

      result = ftp_loop (u, &local_file, dt, proxy_url, recursive, glob);
      recursive = oldrec;

      /* There is a possibility of having HTTP being redirected to
         FTP.  In these cases we must decide whether the text is HTML
         according to the suffix.  The HTML suffixes are `.html',
         `.htm' and a few others, case-insensitive.  */
      if (redirection_count && local_file && u->scheme == SCHEME_FTP)
        {
          if (has_html_suffix_p (local_file))
            *dt |= TEXTHTML;
        }
    }

  if (proxy_url)
    {
      url_free (proxy_url);
      proxy_url = NULL;
    }

  location_changed = (result == NEWLOCATION || result == NEWLOCATION_KEEP_POST);
  if (location_changed)
    {
      char *construced_newloc;
      struct url *newloc_parsed;

      assert (mynewloc != NULL);

      if (local_file)
        xfree (local_file);

      /* The HTTP specs only allow absolute URLs to appear in
         redirects, but a ton of boneheaded webservers and CGIs out
         there break the rules and use relative URLs, and popular
         browsers are lenient about this, so wget should be too. */
      construced_newloc = uri_merge (url, mynewloc);
      xfree (mynewloc);
      mynewloc = construced_newloc;

      /* Reset UTF-8 encoding state, keep the URI encoding and reset
         the content encoding. */
      iri->utf8_encode = opt.enable_iri;
      set_content_encoding (iri, NULL);
      xfree_null (iri->orig_url);
      iri->orig_url = NULL;

      /* Now, see if this new location makes sense. */
      newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true);
      if (!newloc_parsed)
        {
          char *error = url_error (mynewloc, up_error_code);
          logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
                     error);
          if (orig_parsed != u)
            {
              url_free (u);
            }
          xfree (url);
          xfree (mynewloc);
          xfree (error);
          RESTORE_POST_DATA;
          goto bail;
        }

      /* Now mynewloc will become newloc_parsed->url, because if the
         Location contained relative paths like .././something, we
         don't want that propagating as url.  */
      xfree (mynewloc);
      mynewloc = xstrdup (newloc_parsed->url);

      /* Check for max. number of redirections.  */
      if (++redirection_count > opt.max_redirect)
        {
          logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
                     opt.max_redirect);
          url_free (newloc_parsed);
          if (orig_parsed != u)
            {
              url_free (u);
            }
          xfree (url);
          xfree (mynewloc);
          RESTORE_POST_DATA;
          result = WRONGCODE;
          goto bail;
        }

      xfree (url);
      url = mynewloc;
      if (orig_parsed != u)
        {
          url_free (u);
        }
      u = newloc_parsed;

      /* If we're being redirected from POST, and we received a
         redirect code different than 307, we don't want to POST
         again.  Many requests answer POST with a redirection to an
         index page; that redirection is clearly a GET.  We "suspend"
         POST data for the duration of the redirections, and restore
         it when we're done.
	 
	 RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect
	 specifically to preserve the method of the request.
	 */
      if (result != NEWLOCATION_KEEP_POST && !post_data_suspended)
        SUSPEND_POST_DATA;

      goto redirected;
    }

  /* Try to not encode in UTF-8 if fetching failed */
  if (!(*dt & RETROKF) && iri->utf8_encode)
    {
      iri->utf8_encode = false;
      if (orig_parsed != u)
        {
          url_free (u);
        }
      u = url_parse (origurl, NULL, iri, true);
      if (u)
        {
          DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
          url = xstrdup (u->url);
          iri_fallbacked = 1;
          goto redirected;
        }
      else
          DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
    }

  if (local_file && u && *dt & RETROKF)
    {
      register_download (u->url, local_file);

      if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url))
        register_redirection (origurl, u->url);

      if (*dt & TEXTHTML)
        register_html (local_file);

      if (*dt & TEXTCSS)
        register_css (local_file);
    }

  if (file)
    *file = local_file ? local_file : NULL;
  else
    xfree_null (local_file);

  if (orig_parsed != u)
    {
      url_free (u);
    }

  if (redirection_count || iri_fallbacked)
    {
      if (newloc)
        *newloc = url;
      else
        xfree (url);
    }
  else
    {
      if (newloc)
        *newloc = NULL;
      xfree (url);
    }

  RESTORE_POST_DATA;

bail:
  if (register_status)
    inform_exit_status (result);
  return result;
}
Exemple #16
0
char *
fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
{
  long bufsize = sizehint;
  char *hunk = xmalloc (bufsize);
  int tail = 0;                 /* tail position in HUNK */

  assert (!maxsize || maxsize >= bufsize);

  while (1)
    {
      const char *end;
      int pklen, rdlen, remain;

      /* First, peek at the available data. */

      pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
      if (pklen < 0)
        {
          xfree (hunk);
          return NULL;
        }
      end = terminator (hunk, hunk + tail, pklen);
      if (end)
        {
          /* The data contains the terminator: we'll drain the data up
             to the end of the terminator.  */
          remain = end - (hunk + tail);
          assert (remain >= 0);
          if (remain == 0)
            {
              /* No more data needs to be read. */
              hunk[tail] = '\0';
              return hunk;
            }
          if (bufsize - 1 < tail + remain)
            {
              bufsize = tail + remain + 1;
              hunk = xrealloc (hunk, bufsize);
            }
        }
      else
        /* No terminator: simply read the data we know is (or should
           be) available.  */
        remain = pklen;

      /* Now, read the data.  Note that we make no assumptions about
         how much data we'll get.  (Some TCP stacks are notorious for
         read returning less data than the previous MSG_PEEK.)  */

      rdlen = fd_read (fd, hunk + tail, remain, 0);
      if (rdlen < 0)
        {
          xfree_null (hunk);
          return NULL;
        }
      tail += rdlen;
      hunk[tail] = '\0';

      if (rdlen == 0)
        {
          if (tail == 0)
            {
              /* EOF without anything having been read */
              xfree (hunk);
              errno = 0;
              return NULL;
            }
          else
            /* EOF seen: return the data we've read. */
            return hunk;
        }
      if (end && rdlen == remain)
        /* The terminator was seen and the remaining data drained --
           we got what we came for.  */
        return hunk;

      /* Keep looping until all the data arrives. */

      if (tail == bufsize - 1)
        {
          /* Double the buffer size, but refuse to allocate more than
             MAXSIZE bytes.  */
          if (maxsize && bufsize >= maxsize)
            {
              xfree (hunk);
              errno = ENOMEM;
              return NULL;
            }
          bufsize <<= 1;
          if (maxsize && bufsize > maxsize)
            bufsize = maxsize;
          hunk = xrealloc (hunk, bufsize);
        }
    }
}
Exemple #17
0
/* Convert the Un*x-ish style directory listing stored in FILE to a
   linked list of fileinfo (system-independent) entries.  The contents
   of FILE are considered to be produced by the standard Unix `ls -la'
   output (whatever that might be).  BSD (no group) and SYSV (with
   group) listings are handled.

   The time stamps are stored in a separate variable, time_t
   compatible (I hope).  The timezones are ignored.  */
static struct fileinfo *
ftp_parse_unix_ls (const char *file, int ignore_perms)
{
  FILE *fp;
  static const char *months[] = {
    "Jan", "Feb", "Mar", "Apr", "May", "Jun",
    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
  };
  int next, len, i, error, ignore;
  int year, month, day;         /* for time analysis */
  int hour, min, sec, ptype;
  struct tm timestruct, *tnow;
  time_t timenow;
  size_t bufsize = 0;

  char *line = NULL, *tok, *ptok;      /* tokenizer */
  struct fileinfo *dir, *l, cur; /* list creation */

  fp = fopen (file, "rb");
  if (!fp)
    {
      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
      return NULL;
    }
  dir = l = NULL;

  /* Line loop to end of file: */
  while ((len = getline (&line, &bufsize, fp)) > 0)
    {
      len = clean_line (line, len);
      /* Skip if total...  */
      if (!strncasecmp (line, "total", 5))
        continue;
      /* Get the first token (permissions).  */
      tok = strtok (line, " ");
      if (!tok)
        continue;

      cur.name = NULL;
      cur.linkto = NULL;

      /* Decide whether we deal with a file or a directory.  */
      switch (*tok)
        {
        case '-':
          cur.type = FT_PLAINFILE;
          DEBUGP (("PLAINFILE; "));
          break;
        case 'd':
          cur.type = FT_DIRECTORY;
          DEBUGP (("DIRECTORY; "));
          break;
        case 'l':
          cur.type = FT_SYMLINK;
          DEBUGP (("SYMLINK; "));
          break;
        default:
          cur.type = FT_UNKNOWN;
          DEBUGP (("UNKNOWN; "));
          break;
        }

      if (ignore_perms)
        {
          switch (cur.type)
            {
            case FT_PLAINFILE:
              cur.perms = 0644;
              break;
            case FT_DIRECTORY:
              cur.perms = 0755;
              break;
            default:
              /*cur.perms = 1023;*/     /* #### What is this?  --hniksic */
              cur.perms = 0644;
            }
          DEBUGP (("implicit perms %0o; ", cur.perms));
        }
       else
         {
           cur.perms = symperms (tok + 1);
           DEBUGP (("perms %0o; ", cur.perms));
         }

      error = ignore = 0;       /* Erroneous and ignoring entries are
                                   treated equally for now.  */
      year = hour = min = sec = 0; /* Silence the compiler.  */
      month = day = 0;
      ptype = TT_DAY;
      next = -1;
      /* While there are tokens on the line, parse them.  Next is the
         number of tokens left until the filename.

         Use the month-name token as the "anchor" (the place where the
         position wrt the file name is "known").  When a month name is
         encountered, `next' is set to 5.  Also, the preceding
         characters are parsed to get the file size.

         This tactic is quite dubious when it comes to
         internationalization issues (non-English month names), but it
         works for now.  */
      tok = line;
      while (ptok = tok,
             (tok = strtok (NULL, " ")) != NULL)
        {
          --next;
          if (next < 0)         /* a month name was not encountered */
            {
              for (i = 0; i < 12; i++)
                if (!strcasecmp (tok, months[i]))
                  break;
              /* If we got a month, it means the token before it is the
                 size, and the filename is three tokens away.  */
              if (i != 12)
                {
                  wgint size;

                  /* Parse the previous token with str_to_wgint.  */
                  if (ptok == line)
                    {
                      /* Something has gone wrong during parsing. */
                      error = 1;
                      break;
                    }
                  errno = 0;
                  size = str_to_wgint (ptok, NULL, 10);
                  if (size == WGINT_MAX && errno == ERANGE)
                    /* Out of range -- ignore the size.  #### Should
                       we refuse to start the download.  */
                    cur.size = 0;
                  else
                    cur.size = size;
                  DEBUGP (("size: %s; ", number_to_static_string(cur.size)));

                  month = i;
                  next = 5;
                  DEBUGP (("month: %s; ", months[month]));
                }
            }
          else if (next == 4)   /* days */
            {
              if (tok[1])       /* two-digit... */
                day = 10 * (*tok - '0') + tok[1] - '0';
              else              /* ...or one-digit */
                day = *tok - '0';
              DEBUGP (("day: %d; ", day));
            }
          else if (next == 3)
            {
              /* This ought to be either the time, or the year.  Let's
                 be flexible!

                 If we have a number x, it's a year.  If we have x:y,
                 it's hours and minutes.  If we have x:y:z, z are
                 seconds.  */
              year = 0;
              min = hour = sec = 0;
              /* We must deal with digits.  */
              if (c_isdigit (*tok))
                {
                  /* Suppose it's year.  */
                  for (; c_isdigit (*tok); tok++)
                    year = (*tok - '0') + 10 * year;
                  if (*tok == ':')
                    {
                      /* This means these were hours!  */
                      hour = year;
                      year = 0;
                      ptype = TT_HOUR_MIN;
                      ++tok;
                      /* Get the minutes...  */
                      for (; c_isdigit (*tok); tok++)
                        min = (*tok - '0') + 10 * min;
                      if (*tok == ':')
                        {
                          /* ...and the seconds.  */
                          ++tok;
                          for (; c_isdigit (*tok); tok++)
                            sec = (*tok - '0') + 10 * sec;
                        }
                    }
                }
              if (year)
                DEBUGP (("year: %d (no tm); ", year));
              else
                DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
            }
          else if (next == 2)    /* The file name */
            {
              int fnlen;
              char *p;

              /* Since the file name may contain a SPC, it is possible
                 for strtok to handle it wrong.  */
              fnlen = strlen (tok);
              if (fnlen < len - (tok - line))
                {
                  /* So we have a SPC in the file name.  Restore the
                     original.  */
                  tok[fnlen] = ' ';
                  /* If the file is a symbolic link, it should have a
                     ` -> ' somewhere.  */
                  if (cur.type == FT_SYMLINK)
                    {
                      p = strstr (tok, " -> ");
                      if (!p)
                        {
                          error = 1;
                          break;
                        }
                      cur.linkto = xstrdup (p + 4);
                      DEBUGP (("link to: %s\n", cur.linkto));
                      /* And separate it from the file name.  */
                      *p = '\0';
                    }
                }
              /* If we have the filename, add it to the list of files or
                 directories.  */
              /* "." and ".." are an exception!  */
              if (!strcmp (tok, ".") || !strcmp (tok, ".."))
                {
                  DEBUGP (("\nIgnoring `.' and `..'; "));
                  ignore = 1;
                  break;
                }
              /* Some FTP sites choose to have ls -F as their default
                 LIST output, which marks the symlinks with a trailing
                 `@', directory names with a trailing `/' and
                 executables with a trailing `*'.  This is no problem
                 unless encountering a symbolic link ending with `@',
                 or an executable ending with `*' on a server without
                 default -F output.  I believe these cases are very
                 rare.  */
              fnlen = strlen (tok); /* re-calculate `fnlen' */
              cur.name = xmalloc (fnlen + 1);
              memcpy (cur.name, tok, fnlen + 1);
              if (fnlen)
                {
                  if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
                    {
                      cur.name[fnlen - 1] = '\0';
                      DEBUGP (("trailing `/' on dir.\n"));
                    }
                  else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
                    {
                      cur.name[fnlen - 1] = '\0';
                      DEBUGP (("trailing `@' on link.\n"));
                    }
                  else if (cur.type == FT_PLAINFILE
                           && (cur.perms & 0111)
                           && cur.name[fnlen - 1] == '*')
                    {
                      cur.name[fnlen - 1] = '\0';
                      DEBUGP (("trailing `*' on exec.\n"));
                    }
                } /* if (fnlen) */
              else
                error = 1;
              break;
            }
          else
            abort ();
        } /* while */

      if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
        error = 1;

      DEBUGP (("%s\n", cur.name ? cur.name : ""));

      if (error || ignore)
        {
          DEBUGP (("Skipping.\n"));
          xfree_null (cur.name);
          xfree_null (cur.linkto);
          continue;
        }

      if (!dir)
        {
          l = dir = xnew (struct fileinfo);
          memcpy (l, &cur, sizeof (cur));
          l->prev = l->next = NULL;
        }
      else
        {
Exemple #18
0
uerr_t
retrieve_url (const char *origurl, char **file, char **newloc,
              const char *refurl, int *dt, bool recursive)
{
  uerr_t result;
  char *url;
  bool location_changed;
  int dummy;
  char *mynewloc, *proxy;
  struct url *u, *proxy_url;
  int up_error_code;            /* url parse error code */
  char *local_file;
  int redirection_count = 0;

  bool post_data_suspended = false;
  char *saved_post_data = NULL;
  char *saved_post_file_name = NULL;

  /* If dt is NULL, use local storage.  */
  if (!dt)
    {
      dt = &dummy;
      dummy = 0;
    }
  url = xstrdup (origurl);
  if (newloc)
    *newloc = NULL;
  if (file)
    *file = NULL;

  u = url_parse (url, &up_error_code);
  if (!u)
    {
      logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
      xfree (url);
      return URLERROR;
    }

  if (!refurl)
    refurl = opt.referer;

 redirected:

  result = NOCONERROR;
  mynewloc = NULL;
  local_file = NULL;
  proxy_url = NULL;

  proxy = getproxy (u);
  if (proxy)
    {
      /* Parse the proxy URL.  */
      proxy_url = url_parse (proxy, &up_error_code);
      if (!proxy_url)
        {
          logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
                     proxy, url_error (up_error_code));
          xfree (url);
          RESTORE_POST_DATA;
          return PROXERR;
        }
      if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
        {
          logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
          url_free (proxy_url);
          xfree (url);
          RESTORE_POST_DATA;
          return PROXERR;
        }
    }

  if (u->scheme == SCHEME_HTTP
#ifdef HAVE_SSL
      || u->scheme == SCHEME_HTTPS
#endif
      || (proxy_url && proxy_url->scheme == SCHEME_HTTP))
    {
      result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
    }
  else if (u->scheme == SCHEME_FTP)
    {
      /* If this is a redirection, temporarily turn off opt.ftp_glob
         and opt.recursive, both being undesirable when following
         redirects.  */
      bool oldrec = recursive, glob = opt.ftp_glob;
      if (redirection_count)
        oldrec = glob = false;

      result = ftp_loop (u, dt, proxy_url, recursive, glob);
      recursive = oldrec;

      /* There is a possibility of having HTTP being redirected to
         FTP.  In these cases we must decide whether the text is HTML
         according to the suffix.  The HTML suffixes are `.html',
         `.htm' and a few others, case-insensitive.  */
      if (redirection_count && local_file && u->scheme == SCHEME_FTP)
        {
          if (has_html_suffix_p (local_file))
            *dt |= TEXTHTML;
        }
    }

  if (proxy_url)
    {
      url_free (proxy_url);
      proxy_url = NULL;
    }

  location_changed = (result == NEWLOCATION);
  if (location_changed)
    {
      char *construced_newloc;
      struct url *newloc_parsed;

      assert (mynewloc != NULL);

      if (local_file)
        xfree (local_file);

      /* The HTTP specs only allow absolute URLs to appear in
         redirects, but a ton of boneheaded webservers and CGIs out
         there break the rules and use relative URLs, and popular
         browsers are lenient about this, so wget should be too. */
      construced_newloc = uri_merge (url, mynewloc);
      xfree (mynewloc);
      mynewloc = construced_newloc;

      /* Now, see if this new location makes sense. */
      newloc_parsed = url_parse (mynewloc, &up_error_code);
      if (!newloc_parsed)
        {
          logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
                     url_error (up_error_code));
          url_free (u);
          xfree (url);
          xfree (mynewloc);
          RESTORE_POST_DATA;
          return result;
        }

      /* Now mynewloc will become newloc_parsed->url, because if the
         Location contained relative paths like .././something, we
         don't want that propagating as url.  */
      xfree (mynewloc);
      mynewloc = xstrdup (newloc_parsed->url);

      /* Check for max. number of redirections.  */
      if (++redirection_count > opt.max_redirect)
        {
          logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
                     opt.max_redirect);
          url_free (newloc_parsed);
          url_free (u);
          xfree (url);
          xfree (mynewloc);
          RESTORE_POST_DATA;
          return WRONGCODE;
        }

      xfree (url);
      url = mynewloc;
      url_free (u);
      u = newloc_parsed;

      /* If we're being redirected from POST, we don't want to POST
         again.  Many requests answer POST with a redirection to an
         index page; that redirection is clearly a GET.  We "suspend"
         POST data for the duration of the redirections, and restore
         it when we're done. */
      if (!post_data_suspended)
        SUSPEND_POST_DATA;

      goto redirected;
    }

  if (local_file)
    {
      if (*dt & RETROKF)
        {
          register_download (u->url, local_file);
          if (redirection_count && 0 != strcmp (origurl, u->url))
            register_redirection (origurl, u->url);
          if (*dt & TEXTHTML)
            register_html (u->url, local_file);
        }
    }

  if (file)
    *file = local_file ? local_file : NULL;
  else
    xfree_null (local_file);

  url_free (u);

  if (redirection_count)
    {
      if (newloc)
        *newloc = url;
      else
        xfree (url);
    }
  else
    {
      if (newloc)
        *newloc = NULL;
      xfree (url);
    }

  RESTORE_POST_DATA;

  return result;
}
static void
tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
{
  char *name = find_attr (tag, "name", NULL);
  char *http_equiv = find_attr (tag, "http-equiv", NULL);

  if (http_equiv && 0 == strcasecmp (http_equiv, "refresh"))
    {
      /* Some pages use a META tag to specify that the page be
         refreshed by a new page after a given number of seconds.  The
         general format for this is:

           <meta http-equiv=Refresh content="NUMBER; URL=index2.html">

         So we just need to skip past the "NUMBER; URL=" garbage to
         get to the URL.  */

      struct urlpos *entry;
      int attrind;
      int timeout = 0;
      char *p;

      char *refresh = find_attr (tag, "content", &attrind);
      if (!refresh)
        return;

      for (p = refresh; c_isdigit (*p); p++)
        timeout = 10 * timeout + *p - '0';
      if (*p++ != ';')
        return;

      while (c_isspace (*p))
        ++p;
      if (!(   c_toupper (*p)       == 'U'
            && c_toupper (*(p + 1)) == 'R'
            && c_toupper (*(p + 2)) == 'L'
            &&          *(p + 3)  == '='))
        return;
      p += 4;
      while (c_isspace (*p))
        ++p;

      entry = append_url (p, ATTR_POS(tag,attrind,ctx),
                          ATTR_SIZE(tag,attrind), ctx);
      if (entry)
        {
          entry->link_refresh_p = 1;
          entry->refresh_timeout = timeout;
          entry->link_expect_html = 1;
        }
    }
  else if (http_equiv && 0 == strcasecmp (http_equiv, "content-type"))
    {
      /* Handle stuff like:
         <meta http-equiv="Content-Type" content="text/html; charset=CHARSET"> */

      char *mcharset;
      char *content = find_attr (tag, "content", NULL);
      if (!content)
        return;

      mcharset = parse_charset (content);
      if (!mcharset)
        return;

      xfree_null (meta_charset);
      meta_charset = mcharset;
    }
  else if (name && 0 == strcasecmp (name, "robots"))
    {
      /* Handle stuff like:
         <meta name="robots" content="index,nofollow"> */
      char *content = find_attr (tag, "content", NULL);
      if (!content)
        return;
      if (!strcasecmp (content, "none"))
        ctx->nofollow = true;
      else
        {
          while (*content)
            {
              char *end;
              /* Skip any initial whitespace. */
              content += strspn (content, " \f\n\r\t\v");
              /* Find the next occurrence of ',' or whitespace,
               * or the end of the string.  */
              end = content + strcspn (content, ", \f\n\r\t\v");
              if (!strncasecmp (content, "nofollow", end - content))
                ctx->nofollow = true;
              /* Skip past the next comma, if any. */
              if (*end == ',')
                ++end;
              else
                {
                  end = strchr (end, ',');
                  if (end)
                    ++end;
                  else
                    end = content + strlen (content);
                }
              content = end;
            }
        }
    }
}