Beispiel #1
0
/* Reads the contents of file descriptor FD, until it is closed, or a
   read error occurs.  The data is read in 8K chunks, and stored to
   stream fp, which should have been open for writing.  If BUF is
   non-NULL and its file descriptor is equal to FD, flush RBUF first.
   This function will *not* use the rbuf_* functions!

   The EXPECTED argument is passed to show_progress() unchanged, but
   otherwise ignored.

   If opt.verbose is set, the progress is also shown.  RESTVAL
   represents a value from which to start downloading (which will be
   shown accordingly).  If RESTVAL is non-zero, the stream should have
   been open for appending.

   The function exits and returns codes of 0, -1 and -2 if the
   connection was closed, there was a read error, or if it could not
   write to the output stream, respectively.

   IMPORTANT: The function flushes the contents of the buffer in
   rbuf_flush() before actually reading from fd.  If you wish to read
   from fd immediately, flush or discard the buffer.  */
int
get_contents (int fd, FILE *fp, long *len, long restval, long expected,
	      struct rbuf *rbuf, int use_expected, long *elapsed)
{
  int res = 0;
  static char c[8192];
  void *progress = NULL;
  struct wget_timer *timer = wtimer_allocate ();
  long dltime = 0, last_dltime = 0;

  *len = restval;

  if (opt.verbose)
    progress = progress_create (restval, expected);

  if (rbuf && RBUF_FD (rbuf) == fd)
    {
      int sz = 0;
      while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
	{
	  fwrite (c, sizeof (char), res, fp);
	  *len += res;
	  sz += res;
	}
      if (sz)
	fflush (fp);
      if (ferror (fp))
	{
	  res = -2;
	  goto out;
	}
      if (opt.verbose)
	progress_update (progress, sz, 0);
    }

  if (opt.limit_rate)
    limit_bandwidth_reset ();
  wtimer_reset (timer);

  /* Read from fd while there is available data.

     Normally, if expected is 0, it means that it is not known how
     much data is expected.  However, if use_expected is specified,
     then expected being zero means exactly that.  */
  while (!use_expected || (*len < expected))
    {
      int amount_to_read = (use_expected
			    ? MIN (expected - *len, sizeof (c))
			    : sizeof (c));
#ifdef HAVE_SSL
      if (rbuf->ssl!=NULL)
	res = ssl_iread (rbuf->ssl, c, amount_to_read);
      else
#endif /* HAVE_SSL */
	res = iread (fd, c, amount_to_read);

      if (res > 0)
	{
	  fwrite (c, sizeof (char), res, fp);
	  /* Always flush the contents of the network packet.  This
	     should not be adverse to performance, as the network
	     packets typically won't be too tiny anyway.  */
	  fflush (fp);
	  if (ferror (fp))
	    {
	      res = -2;
	      goto out;
	    }

	  /* If bandwidth is not limited, one call to wtimer_elapsed
	     is sufficient.  */
	  dltime = wtimer_elapsed (timer);
	  if (opt.limit_rate)
	    {
	      limit_bandwidth (res, dltime - last_dltime);
	      dltime = wtimer_elapsed (timer);
	      last_dltime = dltime;
	    }

	  if (opt.verbose)
	    progress_update (progress, res, dltime);
	  *len += res;
	}
      else
	break;
    }
  if (res < -1)
    res = -1;

 out:
  if (opt.verbose)
    progress_finish (progress, dltime);
  if (elapsed)
    *elapsed = dltime;
  wtimer_delete (timer);

  return res;
}
void
convert_all_links (void)
{
  slist *html;
  long msecs;
  int file_count = 0;

  struct wget_timer *timer = wtimer_new ();

  /* Destructively reverse downloaded_html_files to get it in the right order.
     recursive_retrieve() used slist_prepend() consistently.  */
  downloaded_html_list = slist_nreverse (downloaded_html_list);

  for (html = downloaded_html_list; html; html = html->next)
    {
      struct urlpos *urls, *cur_url;
      char *url;
      char *file = html->string;

      /* Determine the URL of the HTML file.  get_urls_html will need
	 it.  */
      url = hash_table_get (dl_file_url_map, file);
      if (!url)
	{
	  DEBUGP (("Apparently %s has been removed.\n", file));
	  continue;
	}

      DEBUGP (("Scanning %s (from %s)\n", file, url));

      /* Parse the HTML file...  */
      urls = get_urls_html (file, url, NULL);

      /* We don't respect meta_disallow_follow here because, even if
         the file is not followed, we might still want to convert the
         links that have been followed from other files.  */

      for (cur_url = urls; cur_url; cur_url = cur_url->next)
	{
	  char *local_name;
	  struct url *u = cur_url->url;

	  if (cur_url->link_base_p)
	    {
	      /* Base references have been resolved by our parser, so
		 we turn the base URL into an empty string.  (Perhaps
		 we should remove the tag entirely?)  */
	      cur_url->convert = CO_NULLIFY_BASE;
	      continue;
	    }

	  /* We decide the direction of conversion according to whether
	     a URL was downloaded.  Downloaded URLs will be converted
	     ABS2REL, whereas non-downloaded will be converted REL2ABS.  */
	  local_name = hash_table_get (dl_url_file_map, u->url);

	  /* Decide on the conversion type.  */
	  if (local_name)
	    {
	      /* We've downloaded this URL.  Convert it to relative
                 form.  We do this even if the URL already is in
                 relative form, because our directory structure may
                 not be identical to that on the server (think `-nd',
                 `--cut-dirs', etc.)  */
	      cur_url->convert = CO_CONVERT_TO_RELATIVE;
	      cur_url->local_name = xstrdup (local_name);
	      DEBUGP (("will convert url %s to local %s\n", u->url, local_name));
	    }
	  else
	    {
	      /* We haven't downloaded this URL.  If it's not already
                 complete (including a full host name), convert it to
                 that form, so it can be reached while browsing this
                 HTML locally.  */
	      if (!cur_url->link_complete_p)
		cur_url->convert = CO_CONVERT_TO_COMPLETE;
	      cur_url->local_name = NULL;
	      DEBUGP (("will convert url %s to complete\n", u->url));
	    }
	}

      /* Convert the links in the file.  */
      convert_links (file, urls);
      ++file_count;

      /* Free the data.  */
      free_urlpos (urls);
    }

  msecs = wtimer_elapsed (timer);
  wtimer_delete (timer);
  logprintf (LOG_VERBOSE, _("Converted %d files in %.2f seconds.\n"),
	     file_count, (double)msecs / 1000);
}