static void parse_url(const char *src_url, struct host_info *h) { char *url, *p, *sp; free(h->allocated); h->allocated = url = xstrdup(src_url); if (strncmp(url, "http://", 7) == 0) { h->port = bb_lookup_port("http", "tcp", 80); h->host = url + 7; h->is_ftp = 0; } else if (strncmp(url, "ftp://", 6) == 0) { h->port = bb_lookup_port("ftp", "tcp", 21); h->host = url + 6; h->is_ftp = 1; } else bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url)); // FYI: // "Real" wget 'http://busybox.net?var=a/b' sends this request: // 'GET /?var=a/b HTTP 1.0' // and saves 'index.html?var=a%2Fb' (we save 'b') // wget 'http://busybox.net?login=john@doe': // request: 'GET /?login=john@doe HTTP/1.0' // saves: 'index.html?login=john@doe' (we save '?login=john@doe') // wget 'http://busybox.net#test/test': // request: 'GET / HTTP/1.0' // saves: 'index.html' (we save 'test') // // We also don't add unique .N suffix if file exists... sp = strchr(h->host, '/'); p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p; p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p; if (!sp) { h->path = ""; } else if (*sp == '/') { *sp = '\0'; h->path = sp + 1; } else { // '#' or '?' // http://busybox.net?login=john@doe is a valid URL // memmove converts to: // http:/busybox.nett?login=john@doe... memmove(h->host - 1, h->host, sp - h->host); h->host--; sp[-1] = '\0'; h->path = sp; } // We used to set h->user to NULL here, but this interferes // with handling of code 302 ("object was moved") sp = strrchr(h->host, '@'); if (sp != NULL) { // URL-decode "user:password" string before base64-encoding: // wget http://test:my%[email protected] should send // Authorization: Basic dGVzdDpteSBwYXNz // which decodes to "test:my pass". // Standard wget and curl do this too. *sp = '\0'; h->user = percent_decode_in_place(h->host, /*strict:*/ 0); h->host = sp + 1; } sp = h->host; }
/* basename_from_path() parses hostport_abs_path * to get name. * If there are no path segments in abs_path, name is the host. * Otherwise if the host is NOT idisk.mac.com OR abs_path is * only one segment, name is the last path segment. * Otherwise (host is idisk.mac.com AND there are multiple * path segements), name is the first path segment concatenated * with the last path segment separated with a dash character. * name is assumed to be char[MAXNAMLEN]. */ static int basename_from_path(const char *hostport_abs_path, char *name, size_t maxlength) { int error; char *path; char *host; char *firstPathSegment; char *lastPathSegment; int length; char *colon; char *slash; char *firstChar; char *lastChar; error = 0; path = host = firstPathSegment = lastPathSegment = NULL; /* validate input parameters */ if ( (hostport_abs_path == NULL) || (name == NULL) ) { error = EINVAL; goto exit; } /* no output name yet */ *name = '\0'; /* get length of input */ length = strlen(hostport_abs_path); if ( length == 0 ) { error = EINVAL; goto exit; } /* allocate space for path */ path = malloc(length + 2); /* one extra for slash if needed */ if ( path == NULL ) { error = ENOMEM; goto exit; } /* and make a private copy of hostport_abs_path*/ strlcpy(path, hostport_abs_path, length + 2); /* add a trailing slash if needed */ if ( path[length] != '/' ) { strlcat(path, "/", length + 2); } /* find the first colon (if any) and the first slash */ colon = strchr(path, ':'); slash = strchr(path, '/'); /* get the host name */ if ( (colon == NULL) || (colon > slash) ) { /* if no colon, or the colon is after the slash, * then there is no port so the host is everything * up to the slash */ host = CopySegment(path, slash); } else { /* there is a port so the host is everything * up to the colon */ host = CopySegment(path, colon); } /* find first path segment (if any) */ lastChar = slash; ParsePathSegment(lastChar, &firstChar, &lastChar); if (firstChar != lastChar) { /* copy first path segment */ firstPathSegment = CopySegment(firstChar, lastChar); percent_decode_in_place(firstPathSegment); /* find last path segment (if any) */ while ( *lastChar != '\0' ) { ParsePathSegment(lastChar, &firstChar, &lastChar); if (firstChar != lastChar) { if ( lastPathSegment != NULL ) { /* free up the previous lastPathSegment */ free(lastPathSegment); } /* copy (new) last path segment */ lastPathSegment = CopySegment(firstChar, lastChar); } } if ( lastPathSegment != NULL ) { percent_decode_in_place(lastPathSegment); /* name is lastPathSegment */ if ( (strlen(lastPathSegment) + 1) > MAXNAMLEN ) { error = ENAMETOOLONG; goto exit; } strlcpy(name, lastPathSegment, maxlength); } else { /* no last path segment -- name is firstPathSegment*/ if ( (strlen(firstPathSegment) + 1) > MAXNAMLEN ) { error = ENAMETOOLONG; goto exit; } strlcpy(name, firstPathSegment, maxlength); } } else { /* no path segments -- name is host */ if ( (strlen(host) + 1) > MAXNAMLEN ) { error = ENAMETOOLONG; goto exit; } strlcpy(name, host, maxlength); } exit: /* free up any memory and return any errors */ if ( path != NULL ) { free(path); } if ( host != NULL ) { free(host); } if ( firstPathSegment != NULL ) { free(firstPathSegment); } if ( lastPathSegment != NULL ) { free(lastPathSegment); } return ( error ); }