int wget_main(int argc UNUSED_PARAM, char **argv)
{
    char buf[512];
    struct host_info server, target;
    len_and_sockaddr *lsa;
    unsigned opt;
    int redir_limit;
    char *proxy = NULL;
    char *dir_prefix = NULL;
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
    char *post_data;
    char *extra_headers = NULL;
    llist_t *headers_llist = NULL;
#endif
    FILE *sfp;                      /* socket to web/ftp server         */
    FILE *dfp;                      /* socket to ftp server (data)      */
    char *fname_out;                /* where to direct output (-O)      */
    int output_fd = -1;
    bool use_proxy;                 /* Use proxies if env vars are set  */
    const char *proxy_flag = "on";  /* Use proxies if env vars are set  */
    const char *user_agent = "Wget";/* "User-Agent" header field        */

    static const char keywords[] ALIGN1 =
        "content-length\0""transfer-encoding\0""chunked\0""location\0";
    enum {
        KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
    };
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
    static const char wget_longopts[] ALIGN1 =
        /* name, has_arg, val */
        "continue\0"         No_argument       "c"
        "spider\0"           No_argument       "s"
        "quiet\0"            No_argument       "q"
        "output-document\0"  Required_argument "O"
        "directory-prefix\0" Required_argument "P"
        "proxy\0"            Required_argument "Y"
        "user-agent\0"       Required_argument "U"
        /* Ignored: */
        // "tries\0"            Required_argument "t"
        // "timeout\0"          Required_argument "T"
        /* Ignored (we always use PASV): */
        "passive-ftp\0"      No_argument       "\xff"
        "header\0"           Required_argument "\xfe"
        "post-data\0"        Required_argument "\xfd"
        /* Ignored (we don't do ssl) */
        "no-check-certificate\0" No_argument   "\xfc"
        ;
#endif

    INIT_G();

#if ENABLE_FEATURE_WGET_LONG_OPTIONS
    applet_long_options = wget_longopts;
#endif
    /* server.allocated = target.allocated = NULL; */
    opt_complementary = "-1" IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
    opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:",
                   &fname_out, &dir_prefix,
                   &proxy_flag, &user_agent,
                   NULL, /* -t RETRIES */
                   NULL /* -T NETWORK_READ_TIMEOUT */
                   IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
                   IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
                  );
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
    if (headers_llist) {
        int size = 1;
        char *cp;
        llist_t *ll = headers_llist;
        while (ll) {
            size += strlen(ll->data) + 2;
            ll = ll->link;
        }
        extra_headers = cp = xmalloc(size);
        while (headers_llist) {
            cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
        }
    }
#endif

    /* TODO: compat issue: should handle "wget URL1 URL2..." */

    target.user = NULL;
    parse_url(argv[optind], &target);

    /* Use the proxy if necessary */
    use_proxy = (strcmp(proxy_flag, "off") != 0);
    if (use_proxy) {
        proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
        if (proxy && proxy[0]) {
            server.user = NULL;
            parse_url(proxy, &server);
        } else {
            use_proxy = 0;
        }
    }
    if (!use_proxy) {
        server.port = target.port;
        if (ENABLE_FEATURE_IPV6) {
            server.host = xstrdup(target.host);
        } else {
            server.host = target.host;
        }
    }

    if (ENABLE_FEATURE_IPV6)
        strip_ipv6_scope_id(target.host);

    /* Guess an output filename, if there was no -O FILE */
    if (!(opt & WGET_OPT_OUTNAME)) {
        fname_out = bb_get_last_path_component_nostrip(target.path);
        /* handle "wget http://kernel.org//" */
        if (fname_out[0] == '/' || !fname_out[0])
            fname_out = (char*)"index.html";
        /* -P DIR is considered only if there was no -O FILE */
        if (dir_prefix)
            fname_out = concat_path_file(dir_prefix, fname_out);
    } else {
        if (LONE_DASH(fname_out)) {
            /* -O - */
            output_fd = 1;
            opt &= ~WGET_OPT_CONTINUE;
        }
    }
#if ENABLE_FEATURE_WGET_STATUSBAR
    G.curfile = bb_get_last_path_component_nostrip(fname_out);
#endif

    /* Impossible?
    if ((opt & WGET_OPT_CONTINUE) && !fname_out)
    	bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
    */

    /* Determine where to start transfer */
    if (opt & WGET_OPT_CONTINUE) {
        output_fd = open(fname_out, O_WRONLY);
        if (output_fd >= 0) {
            G.beg_range = xlseek(output_fd, 0, SEEK_END);
        }
        /* File doesn't exist. We do not create file here yet.
         * We are not sure it exists on remove side */
    }

    redir_limit = 5;
resolve_lsa:
    lsa = xhost2sockaddr(server.host, server.port);
    if (!(opt & WGET_OPT_QUIET)) {
        char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
        fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
        free(s);
    }
establish_session:
    if (use_proxy || !target.is_ftp) {
        /*
         *  HTTP session
         */
        char *str;
        int status;

        /* Open socket to http server */
        sfp = open_socket(lsa);

        /* Send HTTP request */
        if (use_proxy) {
            fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
                    target.is_ftp ? "f" : "ht", target.host,
                    target.path);
        } else {
            if (opt & WGET_OPT_POST_DATA)
                fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
            else
                fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
        }

        fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
                target.host, user_agent);

#if ENABLE_FEATURE_WGET_AUTHENTICATION
        if (target.user) {
            fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
                    base64enc_512(buf, target.user));
        }
        if (use_proxy && server.user) {
            fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
                    base64enc_512(buf, server.user));
        }
#endif

        if (G.beg_range)
            fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
        if (extra_headers)
            fputs(extra_headers, sfp);

        if (opt & WGET_OPT_POST_DATA) {
            char *estr = URL_escape(post_data);
            fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
            fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
                    (int) strlen(estr), estr);
            /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
            /*fprintf(sfp, "%s\r\n", estr);*/
            free(estr);
        } else
#endif
        {   /* If "Connection:" is needed, document why */
            fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
        }

        /*
         * Retrieve HTTP response line and check for "200" status code.
         */
read_response:
        if (fgets(buf, sizeof(buf), sfp) == NULL)
            bb_error_msg_and_die("no response from server");

        str = buf;
        str = skip_non_whitespace(str);
        str = skip_whitespace(str);
        // FIXME: no error check
        // xatou wouldn't work: "200 OK"
        status = atoi(str);
        switch (status) {
        case 0:
        case 100:
            while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
                /* eat all remaining headers */;
            goto read_response;
        case 200:
        /*
        Response 204 doesn't say "null file", it says "metadata
        has changed but data didn't":

        "10.2.5 204 No Content
        The server has fulfilled the request but does not need to return
        an entity-body, and might want to return updated metainformation.
        The response MAY include new or updated metainformation in the form
        of entity-headers, which if present SHOULD be associated with
        the requested variant.

        If the client is a user agent, it SHOULD NOT change its document
        view from that which caused the request to be sent. This response
        is primarily intended to allow input for actions to take place
        without causing a change to the user agent's active document view,
        although any new or updated metainformation SHOULD be applied
        to the document currently in the user agent's active view.

        The 204 response MUST NOT include a message-body, and thus
        is always terminated by the first empty line after the header fields."

        However, in real world it was observed that some web servers
        (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
        */
        case 204:
            break;
        case 300:	/* redirection */
        case 301:
        case 302:
        case 303:
            break;
        case 206:
            if (G.beg_range)
                break;
        /* fall through */
        default:
            bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
        }

        /*
         * Retrieve HTTP headers.
         */
        while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
            /* gethdr converted "FOO:" string to lowercase */
            smalluint key;
            /* strip trailing whitespace */
            char *s = strchrnul(str, '\0') - 1;
            while (s >= str && (*s == ' ' || *s == '\t')) {
                *s = '\0';
                s--;
            }
            key = index_in_strings(keywords, buf) + 1;
            if (key == KEY_content_length) {
                G.content_len = BB_STRTOOFF(str, NULL, 10);
                if (G.content_len < 0 || errno) {
                    bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
                }
                G.got_clen = 1;
                continue;
            }
            if (key == KEY_transfer_encoding) {
                if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
                    bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
                G.chunked = G.got_clen = 1;
            }
            if (key == KEY_location && status >= 300) {
                if (--redir_limit == 0)
                    bb_error_msg_and_die("too many redirections");
                fclose(sfp);
                G.got_clen = 0;
                G.chunked = 0;
                if (str[0] == '/')
                    /* free(target.allocated); */
                    target.path = /* target.allocated = */ xstrdup(str+1);
                /* lsa stays the same: it's on the same server */
                else {
                    parse_url(str, &target);
                    if (!use_proxy) {
                        server.host = target.host;
                        /* strip_ipv6_scope_id(target.host); - no! */
                        /* we assume remote never gives us IPv6 addr with scope id */
                        server.port = target.port;
                        free(lsa);
                        goto resolve_lsa;
                    } /* else: lsa stays the same: we use proxy */
                }
                goto establish_session;
            }
        }
//		if (status >= 300)
//			bb_error_msg_and_die("bad redirection (no Location: header from server)");

        /* For HTTP, data is pumped over the same connection */
        dfp = sfp;

    } else {
        /*
         *  FTP session
         */
        sfp = prepare_ftp_session(&dfp, &target, lsa);
    }

    if (opt & WGET_OPT_SPIDER) {
        if (ENABLE_FEATURE_CLEAN_UP)
            fclose(sfp);
        return EXIT_SUCCESS;
    }

    if (output_fd < 0) {
        int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
        /* compat with wget: -O FILE can overwrite */
        if (opt & WGET_OPT_OUTNAME)
            o_flags = O_WRONLY | O_CREAT | O_TRUNC;
        output_fd = xopen(fname_out, o_flags);
    }

    retrieve_file_data(dfp, output_fd);
    xclose(output_fd);

    if (dfp != sfp) {
        /* It's ftp. Close it properly */
        fclose(dfp);
        if (ftpcmd(NULL, NULL, sfp, buf) != 226)
            bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
        /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */
    }

    return EXIT_SUCCESS;
}
Example #2
0
		"passive-ftp\0"      No_argument       "\xff"
		"header\0"           Required_argument "\xfe"
		"post-data\0"        Required_argument "\xfd"
		/* Ignored (we don't do ssl) */
		"no-check-certificate\0" No_argument   "\xfc"
		;
#endif

	INIT_G();
	IF_WIN32_NET(init_winsock();)

#if ENABLE_FEATURE_WGET_LONG_OPTIONS
	applet_long_options = wget_longopts;
#endif
	/* server.allocated = target.allocated = NULL; */
	opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
	opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
				&fname_out, &dir_prefix,
				&proxy_flag, &user_agent,
				IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
				NULL /* -t RETRIES */
				IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
				IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
				);
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
	if (headers_llist) {
		int size = 1;
		char *cp;
		llist_t *ll = headers_llist;
		while (ll) {
			size += strlen(ll->data) + 2;