static int descend_redirect_p (const char *redirected, const char *original, int depth, struct url *start_url_parsed, struct hash_table *blacklist) { struct url *orig_parsed, *new_parsed; struct urlpos *upos; int success; orig_parsed = url_parse (original, NULL); assert (orig_parsed != NULL); new_parsed = url_parse (redirected, NULL); assert (new_parsed != NULL); upos = xmalloc (sizeof (struct urlpos)); memset (upos, 0, sizeof (*upos)); upos->url = new_parsed; success = download_child_p (upos, orig_parsed, depth, start_url_parsed, blacklist); url_free (orig_parsed); url_free (new_parsed); xfree (upos); if (!success) DEBUGP (("Redirection \"%s\" failed the test.\n", redirected)); return success; }
static void do_url_block (const char *rawurl) { struct url url = {0}; url_parse (&url, rawurl); repeat (url_block (&url)); }
static void do_url_public_suffix (const char *rawurl) { struct url url = {0}; url_parse (&url, rawurl); repeat (url_public_suffix (&url)); }
/* ** If the "proxy" setting is defined, then change the URL settings ** (initialized by a prior call to url_parse()) so that the HTTP ** header will be appropriate for the proxy and so that the TCP/IP ** connection will be opened to the proxy rather than to the server. ** ** If zMsg is not NULL and a proxy is used, then print zMsg followed ** by the canonical name of the proxy (with userid and password suppressed). */ void url_enable_proxy(const char *zMsg){ const char *zProxy; zProxy = zProxyOpt; if( zProxy==0 ){ zProxy = db_get("proxy", 0); if( zProxy==0 || zProxy[0]==0 || is_truth(zProxy) ){ zProxy = fossil_getenv("http_proxy"); } } if( zProxy && zProxy[0] && !is_false(zProxy) ){ char *zOriginalUrl = g.urlCanonical; char *zOriginalHost = g.urlHostname; char *zOriginalUser = g.urlUser; char *zOriginalPasswd = g.urlPasswd; g.urlUser = 0; g.urlPasswd = ""; url_parse(zProxy); if( zMsg ) fossil_print("%s%s\n", zMsg, g.urlCanonical); g.urlPath = zOriginalUrl; g.urlHostname = zOriginalHost; if( g.urlUser ){ char *zCredentials1 = mprintf("%s:%s", g.urlUser, g.urlPasswd); char *zCredentials2 = encode64(zCredentials1, -1); g.urlProxyAuth = mprintf("Basic %z", zCredentials2); free(zCredentials1); } g.urlUser = zOriginalUser; g.urlPasswd = zOriginalPasswd; } }
/* ** COMMAND: test-urlparser ** ** Usage: %fossil test-urlparser URL ?options? ** ** --remember Store results in last-sync-url ** --prompt-pw Prompt for password if missing */ void cmd_test_urlparser(void){ int i; unsigned fg = 0; url_proxy_options(); if( find_option("remember",0,0) ){ db_must_be_within_tree(); fg |= URL_REMEMBER; } if( find_option("prompt-pw",0,0) ) fg |= URL_PROMPT_PW; if( g.argc!=3 && g.argc!=4 ){ usage("URL"); } url_parse(g.argv[2], fg); for(i=0; i<2; i++){ fossil_print("g.url.isFile = %d\n", g.url.isFile); fossil_print("g.url.isHttps = %d\n", g.url.isHttps); fossil_print("g.url.isSsh = %d\n", g.url.isSsh); fossil_print("g.url.protocol = %s\n", g.url.protocol); fossil_print("g.url.name = %s\n", g.url.name); fossil_print("g.url.port = %d\n", g.url.port); fossil_print("g.url.dfltPort = %d\n", g.url.dfltPort); fossil_print("g.url.hostname = %s\n", g.url.hostname); fossil_print("g.url.path = %s\n", g.url.path); fossil_print("g.url.user = %s\n", g.url.user); fossil_print("g.url.passwd = %s\n", g.url.passwd); fossil_print("g.url.canonical = %s\n", g.url.canonical); fossil_print("g.url.fossil = %s\n", g.url.fossil); fossil_print("g.url.flags = 0x%02x\n", g.url.flags); if( g.url.isFile || g.url.isSsh ) break; if( i==0 ){ fossil_print("********\n"); url_enable_proxy("Using proxy: "); } } }
int WebSession::Recv() { m_content = NULL; m_contentLength = 0; int r = socket_select_read(m_sock, SERVER_TIMEOUT); if(r <= 0) return 0==r ? ERROR_RECV_TIMEOUT : r; r = http_server_recv(m_http); if(r < 0) return r; void* url = url_parse(http_server_get_path(m_http)); for(int i=0; i<url_getparam_count(url); i++) { const char *name, *value; if(0 != url_getparam(url, i, &name, &value)) continue; m_params.insert(std::make_pair(std::string(name), std::string(value))); } m_path.assign(url_getpath(url)); url_free(url); http_server_get_content(m_http, &m_content, &m_contentLength); if(m_contentLength > 0 && m_contentLength < 2*1024) { printf("%s\n", (const char*)m_content); } return 0; }
static bool http_request_init(http_request_t *req) { url_t * url = url_parse(req->url); char * uri = url_get_uri(url); req->uri = uri ? strdup(uri) : strdup("/"); if(uri) { free(uri); } http_conn_t * conn = http_conn_new(url->host, url->port ? url->port : 80); assert(conn != NULL); if(!conn) { return false; } req->conn = conn; url_free(url); sstring_init(&req->header, 200); sstring_init(&req->res_header, 512); sstring_init(&req->response, 2048); req->ht_headers = hash_table_new(20, free); req->error = NULL; return true; }
/* * Determine the URL of the boot server from the 'file' parameter to OBP, * the SbootURI or BootFile DHCP options, or the 'bootserver' value entered * either as a "-o" argument or at the interpreter. */ static void determine_bootserver_url(void) { char bs[URL_MAX_STRLEN + 1]; size_t len; url_t url; if (bootinfo_get(BI_BOOTSERVER, bs, &len, NULL) != BI_E_SUCCESS) { /* * If OBP has published a network-boot-file property in * /chosen (or there is a DHCP BootFile or SbootURI vendor * option) and it's a URL, construct the bootserver URL * from it. */ len = URL_MAX_STRLEN; if (bootinfo_get(BI_NETWORK_BOOT_FILE, bs, &len, NULL) != BI_E_SUCCESS) { len = URL_MAX_STRLEN; if (bootinfo_get(BI_BOOTFILE, bs, &len, NULL) != BI_E_SUCCESS) { return; } } if (url_parse(bs, &url) == URL_PARSE_SUCCESS) { (void) bootinfo_put(BI_BOOTSERVER, bs, len, 0); } } }
/** url2http Given a url as a string, it returns a pointer to a dynamically allocated http_request struct. args: url = A string of the following form: <scheme>://<user>:<pass>@<host>:<port>/<url-path> where <scheme> is either 'http' or 'https', and all parts are optional except the scheme, the host and the path. So, the minimum is something like <scheme>://<host>/<url-path> Returns a pointer to the http_request struct generated from the string. */ http_request *url2http(char *url) { http_request *nml; struct in_addr addr; int ssl = 0; parsed_url purl; if (url == NULL) { //print_loc(); io_debug("NULL url passed!\n"); return NULL; } if(strncmp(url, "http", 4)) { /* Must start with 'http'. */ print_loc(); io_debug("Bad URL: %s\n", url); return NULL; } purl = url_parse(url); addr = resolve_cached(purl.host, NULL, NULL); if(!strcmp(purl.scheme, "https")) ssl = 1; return new_http_req(addr, purl.port, purl.host, purl.path, ssl, purl.passwd, purl.user); }
/* ** Figure out what user is at the controls. ** ** (1) Use the --user and -U command-line options. ** ** (2) If the local database is open, check in VVAR. ** ** (3) Check the default user in the repository ** ** (4) Try the USER environment variable. ** ** (5) Try the USERNAME environment variable. ** ** (6) Check if the user can be extracted from the remote URL. ** ** The user name is stored in g.zLogin. The uid is in g.userUid. */ void user_select(void){ char *zUrl; if( g.userUid ) return; if( g.zLogin ){ if( attempt_user(g.zLogin)==0 ){ fossil_fatal("no such user: %s", g.zLogin); }else{ return; } } if( g.localOpen && attempt_user(db_lget("default-user",0)) ) return; if( attempt_user(db_get("default-user", 0)) ) return; if( attempt_user(fossil_getenv("USER")) ) return; if( attempt_user(fossil_getenv("USERNAME")) ) return; zUrl = db_get("last-sync-url", 0); if( zUrl ){ url_parse(zUrl); if( attempt_user(g.urlUser) ) return; } fossil_print( "Cannot figure out who you are! Consider using the --user\n" "command line option, setting your USER environment variable,\n" "or setting a default user with \"fossil user default USER\".\n" ); fossil_fatal("cannot determine user"); }
/* ** COMMAND: test-urlparser */ void cmd_test_urlparser(void){ int i; url_proxy_options(); if( g.argc!=3 && g.argc!=4 ){ usage("URL"); } url_parse(g.argv[2]); for(i=0; i<2; i++){ fossil_print("g.urlIsFile = %d\n", g.urlIsFile); fossil_print("g.urlIsHttps = %d\n", g.urlIsHttps); fossil_print("g.urlIsSsh = %d\n", g.urlIsSsh); fossil_print("g.urlProtocol = %s\n", g.urlProtocol); fossil_print("g.urlName = %s\n", g.urlName); fossil_print("g.urlPort = %d\n", g.urlPort); fossil_print("g.urlDfltPort = %d\n", g.urlDfltPort); fossil_print("g.urlHostname = %s\n", g.urlHostname); fossil_print("g.urlPath = %s\n", g.urlPath); fossil_print("g.urlUser = %s\n", g.urlUser); fossil_print("g.urlPasswd = %s\n", g.urlPasswd); fossil_print("g.urlCanonical = %s\n", g.urlCanonical); fossil_print("g.urlFossil = %s\n", g.urlFossil); if( g.urlIsFile || g.urlIsSsh ) break; if( i==0 ){ fossil_print("********\n"); url_enable_proxy("Using proxy: "); } } }
/** * @brief Try to inherit logging settings from environment * @param [in] ns pointer to logging namespace * @return on success, zero is returned * @retval -1 error occurred */ static int ll_ns_env(struct ll_namespace *ns) { assert(ns); assert(ns->name); const char *env; /* get settings from LIBLOG|LIBLOG_%s variable */ if (*ns->name) { char *s; if (-1 == asprintf(&s, "LIBLOG_%s", ns->name)) { return (-1); } env = getenv(s); free(s); } else { env = getenv("LIBLOG"); } if (!env) { /* nothing to inherit */ return (-1); } /* logging level */ ns->level = atoi(env); /* URI */ const char *uri = strchr(env, ','); if (!uri) { /* only logging level was set by environment variable? */ return (-1); } ++ uri; struct url *u; if (!url_parse(uri, &u)) { return (-1); } /* try to initialize logger */ int rc = ll_logger_open(u, ns); url_free(u); return (rc); }
bool res_retrieve_file (const char *url, char **file, struct iri *iri) { struct iri *i = iri_new (); uerr_t err; char *robots_url = uri_merge (url, RES_SPECS_LOCATION); int saved_ts_val = opt.timestamping; int saved_sp_val = opt.spider, url_err; struct url * url_parsed; /* Copy server URI encoding for a possible IDNA transformation, no need to encode the full URI in UTF-8 because "robots.txt" is plain ASCII */ set_uri_encoding (i, iri->uri_encoding, false); i->utf8_encode = false; logputs (LOG_VERBOSE, _("Loading robots.txt; please ignore errors.\n")); *file = NULL; opt.timestamping = false; opt.spider = false; url_parsed = url_parse (robots_url, &url_err, i, true); if (!url_parsed) { char *error = url_error (robots_url, url_err); logprintf (LOG_NOTQUIET, "%s: %s.\n", robots_url, error); xfree (error); err = URLERROR; } else { err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL, false, i, false); url_free(url_parsed); } opt.timestamping = saved_ts_val; opt.spider = saved_sp_val; xfree (robots_url); iri_free (i); if (err != RETROK && *file != NULL) { /* If the file is not retrieved correctly, but retrieve_url allocated the file name, deallocate is here so that the caller doesn't have to worry about it. */ xfree (*file); *file = NULL; } return err == RETROK; }
static int hls_server_onhttp(void* http, void* session, const char* method, const char* path) { // decode request uri void* url = url_parse(path); std::string s = url_getpath(url); url_free(url); path = s.c_str(); if (0 == strncmp(path, "/live/", 6)) { std::vector<std::string> paths; Split(path + 6, "/", paths); if (strendswith(path, ".m3u8") && 1 == paths.size()) { std::string app = paths[0].substr(0, paths[0].length() - 5); if (s_playlists.find(app) == s_playlists.end()) { hls_playlist_t* playlist = new hls_playlist_t(); playlist->file = app; playlist->m3u8 = hls_m3u8_create(HLS_LIVE_NUM); playlist->hls = hls_media_create(HLS_DURATION * 1000, hls_handler, playlist); playlist->i = 0; s_playlists[app] = playlist; thread_create(&playlist->t, hls_server_worker, playlist); } return hls_server_m3u8(session, app); } else if (strendswith(path, ".ts") && 2 == paths.size()) { if (s_playlists.find(paths[0]) != s_playlists.end()) { return hls_server_ts(session, paths[0], paths[1]); } } } else if (0 == strncmp(path, "/vod/", 5)) { if (path_testfile(path+5)) { return hls_server_reply_file(session, path + 5); } } return hls_server_reply(session, 404, ""); }
HISINFOPOST_API BOOL PASCAL confirmPres(const char *url, const char *pres_no, int post_type) { url_schema *urls = url_parse(url); if (!urls) return FALSE; char data[100] = { 0 }; sprintf(data, "pres_no=%s&post_type=%d", pres_no, post_type); static TCHAR hdrs[] = _T("Content-Type: text/html"); BOOL result = TRUE; if (ERROR_SUCCESS != httpPost(urls, hdrs, data)) result = FALSE; url_free(urls); return result; }
String StringUtil::DecodeFileUrl(const String& input) { Url url; if (!url_parse(url, input.data(), input.size())) { return null_string; } if (bstrcasecmp(url.scheme.data(), url.scheme.size(), "file", sizeof("file")-1) != 0) { // Not file scheme return null_string; } if (url.host.size() > 0 && bstrcasecmp(url.host.data(), url.host.size(), "localhost", sizeof("localhost")-1) != 0) { // Not localhost or empty host return null_string; } return url_raw_decode(url.path.data(), url.path.size()); }
Variant HHVM_FUNCTION(parse_url, const String& url, int64_t component /* = -1 */) { Url resource; if (!url_parse(resource, url.data(), url.size())) { return false; } if (component > -1) { switch (component) { case k_PHP_URL_SCHEME: RETURN_COMPONENT(scheme); break; case k_PHP_URL_HOST: RETURN_COMPONENT(host); break; case k_PHP_URL_USER: RETURN_COMPONENT(user); break; case k_PHP_URL_PASS: RETURN_COMPONENT(pass); break; case k_PHP_URL_PATH: RETURN_COMPONENT(path); break; case k_PHP_URL_QUERY: RETURN_COMPONENT(query); break; case k_PHP_URL_FRAGMENT: RETURN_COMPONENT(fragment); break; case k_PHP_URL_PORT: if (resource.port) { return resource.port; } break; default: raise_warning( "parse_url(): Invalid URL component identifier %" PRId64, component); return false; } return init_null(); } ArrayInit ret(resource.port ? 8 : 7, ArrayInit::Map{}); SET_COMPONENT(scheme); SET_COMPONENT(host); if (resource.port) { ret.set(s_port, (int64_t)resource.port); } SET_COMPONENT(user); SET_COMPONENT(pass); SET_COMPONENT(path); SET_COMPONENT(query); SET_COMPONENT(fragment); return ret.toVariant(); }
String libxml_get_valid_file_path(const String& source) { bool isFileUri = false; bool isUri = false; String file_dest(source); Url url; if (url_parse(url, file_dest.data(), file_dest.size())) { isUri = true; if (url.scheme.same(s_file)) { file_dest = StringUtil::UrlDecode(url.path, false); isFileUri = true; } } if (url.scheme.empty() && (!isUri || isFileUri)) { file_dest = File::TranslatePath(file_dest); } return file_dest; }
/* {{{ proto array parse_url(string url) Parse a URL and return its components */ void php3_parse_url(INTERNAL_FUNCTION_PARAMETERS) { pval *string; url *resource; TLS_VARS; if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &string) == FAILURE) { WRONG_PARAM_COUNT; } convert_to_string(string); resource = url_parse(string->value.str.val); if (resource == NULL) { php3_error(E_WARNING, "unable to parse url (%s)", string->value.str.val); RETURN_FALSE; } /* allocate an array for return */ if (array_init(return_value) == FAILURE) { free_url(resource); RETURN_FALSE; } /* add the various elements to the array */ if (resource->scheme != NULL) add_assoc_string(return_value, "scheme", resource->scheme, 1); if (resource->host != NULL) add_assoc_string(return_value, "host", resource->host, 1); if (resource->port != 0) add_assoc_long(return_value, "port", resource->port); if (resource->user != NULL) add_assoc_string(return_value, "user", resource->user, 1); if (resource->pass != NULL) add_assoc_string(return_value, "pass", resource->pass, 1); if (resource->path != NULL) add_assoc_string(return_value, "path", resource->path, 1); if (resource->query != NULL) add_assoc_string(return_value, "query", resource->query, 1); if (resource->fragment != NULL) add_assoc_string(return_value, "fragment", resource->fragment, 1); free_url(resource); }
Variant f_parse_url(CStrRef url, int component /* = -1 */) { Url resource; if (!url_parse(resource, url.data(), url.size())) { raise_notice("invalid url: %s", url.data()); return false; } if (component > -1) { switch (component) { case PHP_URL_SCHEME: RETURN_COMPONENT(scheme); break; case PHP_URL_HOST: RETURN_COMPONENT(host); break; case PHP_URL_USER: RETURN_COMPONENT(user); break; case PHP_URL_PASS: RETURN_COMPONENT(pass); break; case PHP_URL_PATH: RETURN_COMPONENT(path); break; case PHP_URL_QUERY: RETURN_COMPONENT(query); break; case PHP_URL_FRAGMENT: RETURN_COMPONENT(fragment); break; case PHP_URL_PORT: if (resource.port) { return resource.port; } break; default: throw_invalid_argument("component: %d", component); return false; } return null; } Array ret; SET_COMPONENT(scheme); SET_COMPONENT(host); SET_COMPONENT(user); SET_COMPONENT(pass); SET_COMPONENT(path); SET_COMPONENT(query); SET_COMPONENT(fragment); if (resource.port) { ret.set("port", (int64)resource.port); } return ret; }
/* * Assign a URL to cliptr (having verified the format), or output cliptr's * value as a URL. The host must be specified in dotted-decimal, and the * scheme must not be https. */ static int clurl(cli_ent_t *cliptr, char *valstr, boolean_t out) { url_t u; uint_t len; if (out) { printf("%s", (char *)cliptr->varptr); return (CLI_CONT); } if (url_parse(valstr, &u) != URL_PARSE_SUCCESS || u.https || inet_addr(u.hport.hostname) == (in_addr_t)-1 || (len = strlen(valstr)) >= cliptr->varmax) { return (CLI_FAIL); } (void) strcpy(cliptr->varptr, valstr); cliptr->varlen = len + 1; return (CLI_SET); }
int urlbuf_parse (struct urlbuf *urlbuf, const char *url_string) { bzero(urlbuf, sizeof(*urlbuf)); if (strlen(url_string) > sizeof(urlbuf->buf)) { log_error("url is too long: %zu", strlen(url_string)); return -1; } strncpy(urlbuf->buf, url_string, sizeof(urlbuf->buf)); urlbuf->buf[sizeof(urlbuf->buf) - 1] = '\0'; log_debug("parse: %s", urlbuf->buf); if (url_parse(&urlbuf->url, urlbuf->buf)) { log_warning("invalid url: %s", url_string); return 1; } return 0; }
Variant f_parse_url(const String& url, int component /* = -1 */) { Url resource; if (!url_parse(resource, url.data(), url.size())) { return false; } if (component > -1) { switch (component) { case k_PHP_URL_SCHEME: RETURN_COMPONENT(scheme); break; case k_PHP_URL_HOST: RETURN_COMPONENT(host); break; case k_PHP_URL_USER: RETURN_COMPONENT(user); break; case k_PHP_URL_PASS: RETURN_COMPONENT(pass); break; case k_PHP_URL_PATH: RETURN_COMPONENT(path); break; case k_PHP_URL_QUERY: RETURN_COMPONENT(query); break; case k_PHP_URL_FRAGMENT: RETURN_COMPONENT(fragment); break; case k_PHP_URL_PORT: if (resource.port) { return resource.port; } break; default: throw_invalid_argument("component: %d", component); return false; } return uninit_null(); } ArrayInit ret(8); SET_COMPONENT(scheme); SET_COMPONENT(host); if (resource.port) { ret.set(s_port, (int64_t)resource.port); } SET_COMPONENT(user); SET_COMPONENT(pass); SET_COMPONENT(path); SET_COMPONENT(query); SET_COMPONENT(fragment); return ret.create(); }
/* ** If the "proxy" setting is defined, then change the URL settings ** (initialized by a prior call to url_parse()) so that the HTTP ** header will be appropriate for the proxy and so that the TCP/IP ** connection will be opened to the proxy rather than to the server. ** ** If zMsg is not NULL and a proxy is used, then print zMsg followed ** by the canonical name of the proxy (with userid and password suppressed). */ void url_enable_proxy(const char *zMsg){ const char *zProxy; zProxy = zProxyOpt; if( zProxy==0 ){ zProxy = db_get("proxy", 0); if( zProxy==0 || zProxy[0]==0 || is_false(zProxy) ){ zProxy = fossil_getenv("http_proxy"); } } if( zProxy && zProxy[0] && !is_false(zProxy) && !g.url.isSsh && !g.url.isFile ){ char *zOriginalUrl = g.url.canonical; char *zOriginalHost = g.url.hostname; int fOriginalIsHttps = g.url.isHttps; char *zOriginalUser = g.url.user; char *zOriginalPasswd = g.url.passwd; char *zOriginalUrlPath = g.url.path; int iOriginalPort = g.url.port; unsigned uOriginalFlags = g.url.flags; g.url.user = 0; g.url.passwd = ""; url_parse(zProxy, 0); if( zMsg ) fossil_print("%s%s\n", zMsg, g.url.canonical); g.url.path = zOriginalUrl; g.url.hostname = zOriginalHost; if( g.url.user ){ char *zCredentials1 = mprintf("%s:%s", g.url.user, g.url.passwd); char *zCredentials2 = encode64(zCredentials1, -1); g.url.proxyAuth = mprintf("Basic %z", zCredentials2); free(zCredentials1); } g.url.user = zOriginalUser; g.url.passwd = zOriginalPasswd; g.url.isHttps = fOriginalIsHttps; g.url.useProxy = 1; g.url.proxyUrlPath = zOriginalUrlPath; g.url.proxyOrigPort = iOriginalPort; g.url.flags = uOriginalFlags; } }
HISINFOPOST_API int PASCAL postPres(const char *url, const char *xml, int post_type, const char *encoding) { int rc = 0; url_schema *urls = url_parse(url); if (!urls) return ERROR_URL_INVALID; HWND hParent = ::GetForegroundWindow(); rc = DialogBox(hInstance, MAKEINTRESOURCE(IDD_DIALOG1), hParent, DlgProc); if (rc == ACTION_COMMIT) { int length = strlen(xml) + 100; std::vector<char> data; data.reserve(length); char *p = &data[0]; length = sprintf(p, "post_type=%d&encoding=%s&data=%s", post_type, encoding, xml); p[length] = '\0'; static TCHAR hdrs[] = _T("Content-Type: application/xml"); if (ERROR_SUCCESS != httpPost(urls, hdrs, p)) rc = ERROR_POST_FAILED; } url_free(urls); return rc; }
static void convert_links_in_hashtable (struct hash_table *downloaded_set, int is_css, int *file_count) { int i; int cnt; char **file_array; cnt = 0; if (downloaded_set) cnt = hash_table_count (downloaded_set); if (cnt == 0) return; file_array = alloca_array (char *, cnt); string_set_to_array (downloaded_set, file_array); for (i = 0; i < cnt; i++) { struct urlpos *urls, *cur_url; char *url; char *file = file_array[i]; /* Determine the URL of the file. get_urls_{html,css} will need it. */ url = hash_table_get (dl_file_url_map, file); if (!url) { DEBUGP (("Apparently %s has been removed.\n", file)); continue; } DEBUGP (("Scanning %s (from %s)\n", file, url)); /* Parse the file... */ urls = is_css ? get_urls_css_file (file, url) : get_urls_html (file, url, NULL, NULL); /* We don't respect meta_disallow_follow here because, even if the file is not followed, we might still want to convert the links that have been followed from other files. */ for (cur_url = urls; cur_url; cur_url = cur_url->next) { char *local_name; struct url *u; struct iri *pi; if (cur_url->link_base_p) { /* Base references have been resolved by our parser, so we turn the base URL into an empty string. (Perhaps we should remove the tag entirely?) */ cur_url->convert = CO_NULLIFY_BASE; continue; } /* We decide the direction of conversion according to whether a URL was downloaded. Downloaded URLs will be converted ABS2REL, whereas non-downloaded will be converted REL2ABS. */ pi = iri_new (); set_uri_encoding (pi, opt.locale, true); u = url_parse (cur_url->url->url, NULL, pi, true); if (!u) continue; local_name = hash_table_get (dl_url_file_map, u->url); /* Decide on the conversion type. */ if (local_name) { /* We've downloaded this URL. Convert it to relative form. We do this even if the URL already is in relative form, because our directory structure may not be identical to that on the server (think `-nd', `--cut-dirs', etc.) */ cur_url->convert = CO_CONVERT_TO_RELATIVE; cur_url->local_name = xstrdup (local_name); DEBUGP (("will convert url %s to local %s\n", u->url, local_name)); } else { /* We haven't downloaded this URL. If it's not already complete (including a full host name), convert it to that form, so it can be reached while browsing this HTML locally. */ if (!cur_url->link_complete_p) cur_url->convert = CO_CONVERT_TO_COMPLETE; cur_url->local_name = NULL; DEBUGP (("will convert url %s to complete\n", u->url)); } url_free (u); iri_free (pi); } /* Convert the links in the file. */ convert_links (file, urls); ++*file_count; /* Free the data. */ free_urlpos (urls); } }
int main(int argc, char *argv[]) { struct cgi_applet *a = NULL; struct rlimit rl; int i; seteuid(BBSUID); setuid(BBSUID); setgid(BBSGID); cgi_time(NULL); rl.rlim_cur = 20 * 1024 * 1024; rl.rlim_max = 40 * 1024 * 1024; setrlimit(RLIMIT_CORE, &rl); thispid = getpid(); now_t = time(NULL); srand(now_t * 2 + thispid); wwwcache = get_shm(WWWCACHE_SHMKEY, sizeof (struct WWWCACHE)); if (NULL == wwwcache) exit(0); thisversion = file_time(argv[0]); if (thisversion > wwwcache->www_version) wwwcache->www_version = thisversion; html_header(0); if (geteuid() != BBSUID) http_fatal("uid error."); chdir(BBSHOME); shm_init(); if (ummap()) http_fatal("mmap error."); signal(SIGTERM, wantquit); if (access("NOLOGIN", F_OK)) nologin = 0; get_att_server(); while (FCGI_Accept() >= 0) { // start_outcache(); cginame = NULL; incgiloop = 1; if (setjmp(cgi_start)) { // end_outcache(); cgi_time(a); if (!incgiloop || wwwcache->www_version > thisversion || rt++ > 40000) { logtimeused(); exit(2); } incgiloop = 0; continue; } html_header(0); now_t = time(NULL); via_proxy = 0; strsncpy(fromhost, getsenv("REMOTE_ADDR"), 46); //ipv6 by leoncom inet_pton(PF_INET6,fromhost,&from_addr); //inet_aton(fromhost, &from_addr); /* ipv6 by leoncom 无视validproxy for (i = 0; wwwcache->validproxy[i] && i < MAX_PROXY_NUM; i++) { if (from_addr.s_addr == wwwcache->validproxy[i]) { via_proxy = 1; break; } } if (via_proxy) { char *ptr, *p; int IPLEN = 255; ptr = getenv("HTTP_X_FORWARDED_FOR"); if (!ptr) ptr = getsenv("REMOTE_ADDR"); p = strrchr(ptr, ','); if (p != NULL) { while (!isdigit(*p) && *p) p++; if (*p) strncpy(fromhost, p, IPLEN); else strncpy(fromhost, ptr, IPLEN); } else strncpy(fromhost, ptr, IPLEN); fromhost[IPLEN] = 0; inet_aton(fromhost, &from_addr); } */ if (url_parse()) http_fatal("%s 没有实现的功能!", getsenv("SCRIPT_URL")); http_parm_init(); a = get_cgi_applet(needcgi); if (a != NULL) { cginame = a->name[0]; //access(getsenv("QUERY_STRING"), F_OK); wwwcache->www_visit++; (*(a->main)) (); // end_outcache(); cgi_time(a); if (!incgiloop || wwwcache->www_version > thisversion) { logtimeused(); exit(4); } incgiloop = 0; continue; } http_fatal("%s 没有实现的功能!", getsenv("SCRIPT_URL")); // end_outcache(); incgiloop = 0; } munmap(ummap_ptr, ummap_size); exit(5); }
uerr_t retrieve_from_file (const char *file, bool html, int *count) { uerr_t status; struct urlpos *url_list, *cur_url; struct iri *iri = iri_new(); char *input_file, *url_file = NULL; const char *url = file; status = RETROK; /* Suppose everything is OK. */ *count = 0; /* Reset the URL count. */ /* sXXXav : Assume filename and links in the file are in the locale */ set_uri_encoding (iri, opt.locale, true); set_content_encoding (iri, opt.locale); if (url_valid_scheme (url)) { int dt,url_err; uerr_t status; struct url *url_parsed = url_parse (url, &url_err, iri, true); if (!url_parsed) { char *error = url_error (url, url_err); logprintf (LOG_NOTQUIET, "%s: %s.\n", url, error); xfree (error); return URLERROR; } if (!opt.base_href) opt.base_href = xstrdup (url); status = retrieve_url (url_parsed, url, &url_file, NULL, NULL, &dt, false, iri, true); url_free (url_parsed); if (!url_file || (status != RETROK)) return status; if (dt & TEXTHTML) html = true; /* If we have a found a content encoding, use it. * ( == is okay, because we're checking for identical object) */ if (iri->content_encoding != opt.locale) set_uri_encoding (iri, iri->content_encoding, false); /* Reset UTF-8 encode status */ iri->utf8_encode = opt.enable_iri; xfree_null (iri->orig_url); iri->orig_url = NULL; input_file = url_file; } else input_file = (char *) file; url_list = (html ? get_urls_html (input_file, NULL, NULL, iri) : get_urls_file (input_file)); xfree_null (url_file); for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) { char *filename = NULL, *new_file = NULL; int dt; struct iri *tmpiri = iri_dup (iri); struct url *parsed_url = NULL; if (cur_url->ignore_when_downloading) continue; if (opt.quota && total_downloaded_bytes > opt.quota) { status = QUOTEXC; break; } parsed_url = url_parse (cur_url->url->url, NULL, tmpiri, true); if ((opt.recursive || opt.page_requisites) && (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url))) { int old_follow_ftp = opt.follow_ftp; /* Turn opt.follow_ftp on in case of recursive FTP retrieval */ if (cur_url->url->scheme == SCHEME_FTP) opt.follow_ftp = 1; status = retrieve_tree (parsed_url ? parsed_url : cur_url->url, tmpiri); opt.follow_ftp = old_follow_ftp; } else status = retrieve_url (parsed_url ? parsed_url : cur_url->url, cur_url->url->url, &filename, &new_file, NULL, &dt, opt.recursive, tmpiri, true); if (parsed_url) url_free (parsed_url); if (filename && opt.delete_after && file_exists_p (filename)) { DEBUGP (("\ Removing file due to --delete-after in retrieve_from_file():\n")); logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename); if (unlink (filename)) logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); dt &= ~RETROKF; } xfree_null (new_file); xfree_null (filename); iri_free (tmpiri); }
uerr_t retrieve_url (struct url * orig_parsed, const char *origurl, char **file, char **newloc, const char *refurl, int *dt, bool recursive, struct iri *iri, bool register_status) { uerr_t result; char *url; bool location_changed; bool iri_fallbacked = 0; int dummy; char *mynewloc, *proxy; struct url *u = orig_parsed, *proxy_url; int up_error_code; /* url parse error code */ char *local_file; int redirection_count = 0; bool post_data_suspended = false; char *saved_post_data = NULL; char *saved_post_file_name = NULL; /* If dt is NULL, use local storage. */ if (!dt) { dt = &dummy; dummy = 0; } url = xstrdup (origurl); if (newloc) *newloc = NULL; if (file) *file = NULL; if (!refurl) refurl = opt.referer; redirected: /* (also for IRI fallbacking) */ result = NOCONERROR; mynewloc = NULL; local_file = NULL; proxy_url = NULL; proxy = getproxy (u); if (proxy) { struct iri *pi = iri_new (); set_uri_encoding (pi, opt.locale, true); pi->utf8_encode = false; /* Parse the proxy URL. */ proxy_url = url_parse (proxy, &up_error_code, NULL, true); if (!proxy_url) { char *error = url_error (proxy, up_error_code); logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"), proxy, error); xfree (url); xfree (error); RESTORE_POST_DATA; result = PROXERR; goto bail; } if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme) { logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy); url_free (proxy_url); xfree (url); RESTORE_POST_DATA; result = PROXERR; goto bail; } } if (u->scheme == SCHEME_HTTP #ifdef HAVE_SSL || u->scheme == SCHEME_HTTPS #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt, proxy_url, iri); } else if (u->scheme == SCHEME_FTP) { /* If this is a redirection, temporarily turn off opt.ftp_glob and opt.recursive, both being undesirable when following redirects. */ bool oldrec = recursive, glob = opt.ftp_glob; if (redirection_count) oldrec = glob = false; result = ftp_loop (u, &local_file, dt, proxy_url, recursive, glob); recursive = oldrec; /* There is a possibility of having HTTP being redirected to FTP. In these cases we must decide whether the text is HTML according to the suffix. The HTML suffixes are `.html', `.htm' and a few others, case-insensitive. */ if (redirection_count && local_file && u->scheme == SCHEME_FTP) { if (has_html_suffix_p (local_file)) *dt |= TEXTHTML; } } if (proxy_url) { url_free (proxy_url); proxy_url = NULL; } location_changed = (result == NEWLOCATION || result == NEWLOCATION_KEEP_POST); if (location_changed) { char *construced_newloc; struct url *newloc_parsed; assert (mynewloc != NULL); if (local_file) xfree (local_file); /* The HTTP specs only allow absolute URLs to appear in redirects, but a ton of boneheaded webservers and CGIs out there break the rules and use relative URLs, and popular browsers are lenient about this, so wget should be too. */ construced_newloc = uri_merge (url, mynewloc); xfree (mynewloc); mynewloc = construced_newloc; /* Reset UTF-8 encoding state, keep the URI encoding and reset the content encoding. */ iri->utf8_encode = opt.enable_iri; set_content_encoding (iri, NULL); xfree_null (iri->orig_url); iri->orig_url = NULL; /* Now, see if this new location makes sense. */ newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true); if (!newloc_parsed) { char *error = url_error (mynewloc, up_error_code); logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), error); if (orig_parsed != u) { url_free (u); } xfree (url); xfree (mynewloc); xfree (error); RESTORE_POST_DATA; goto bail; } /* Now mynewloc will become newloc_parsed->url, because if the Location contained relative paths like .././something, we don't want that propagating as url. */ xfree (mynewloc); mynewloc = xstrdup (newloc_parsed->url); /* Check for max. number of redirections. */ if (++redirection_count > opt.max_redirect) { logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"), opt.max_redirect); url_free (newloc_parsed); if (orig_parsed != u) { url_free (u); } xfree (url); xfree (mynewloc); RESTORE_POST_DATA; result = WRONGCODE; goto bail; } xfree (url); url = mynewloc; if (orig_parsed != u) { url_free (u); } u = newloc_parsed; /* If we're being redirected from POST, and we received a redirect code different than 307, we don't want to POST again. Many requests answer POST with a redirection to an index page; that redirection is clearly a GET. We "suspend" POST data for the duration of the redirections, and restore it when we're done. RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect specifically to preserve the method of the request. */ if (result != NEWLOCATION_KEEP_POST && !post_data_suspended) SUSPEND_POST_DATA; goto redirected; } /* Try to not encode in UTF-8 if fetching failed */ if (!(*dt & RETROKF) && iri->utf8_encode) { iri->utf8_encode = false; if (orig_parsed != u) { url_free (u); } u = url_parse (origurl, NULL, iri, true); if (u) { DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url))); url = xstrdup (u->url); iri_fallbacked = 1; goto redirected; } else DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url))); } if (local_file && u && *dt & RETROKF) { register_download (u->url, local_file); if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url)) register_redirection (origurl, u->url); if (*dt & TEXTHTML) register_html (local_file); if (*dt & TEXTCSS) register_css (local_file); } if (file) *file = local_file ? local_file : NULL; else xfree_null (local_file); if (orig_parsed != u) { url_free (u); } if (redirection_count || iri_fallbacked) { if (newloc) *newloc = url; else xfree (url); } else { if (newloc) *newloc = NULL; xfree (url); } RESTORE_POST_DATA; bail: if (register_status) inform_exit_status (result); return result; }
int http_get(char *urlstr, http_req_func_t func, void *farg) { url_t url; struct sockaddr_in iaddr; sockaddrlen_t addrlen; int i, err=-1; sock_t sock; int url_parsed=0; char buf[16384], buf1[1024]; size_t len=0; http_header_t http; http_req_t req; int got_header = 0; http_req_error_t req_err = 0; do { memset(&req, 0, sizeof(req)); req.req_url = &url; req.reply_hdr = &http; req_err = HTTP_REQ_ERROR_BAD_URL; i = url_parse(&url, urlstr); assertb(i==0); url_parsed = 1; if( strcasecmp(url.proto, "file") == 0 ) { struct stat st; FILE *f=0; req_err = HTTP_REQ_ERROR_FILE_NOT_FOUND; do { i = stat(url.path, &st); assertb(i==0); http.content_len = st.st_size; req.req_state = HTTP_REQ_BODY; f = fopen(url.path, "r"); assertb(f); while(1) { len = fread(buf, 1, sizeof(buf), f); if( len < 0 ) { req_err = HTTP_REQ_ERROR_INCOMPLETE; break; } if( len <= 0 ) break; err = func(&req, buf, len, farg); if( err ) break; } req_err = 0; err = 0; } while(0); if( f ) { fclose(f); } break; } req_err = HTTP_REQ_ERROR_BAD_URL; assertb( strcasecmp(url.proto, "http") == 0 ); req_err = HTTP_REQ_ERROR_CONNECT; sock = socket(AF_INET, SOCK_STREAM, 0); assertb_sockerr(sock>=0); req.req_state = HTTP_REQ_RESOLVE; i = snprintf(buf, sizeof(buf), "resolving host %s\n", url.host); err = func(&req, buf, i, farg); if( err != 0 ) break; addrlen = iaddr_pack(&iaddr, inet_resolve(url.host), url.port); req.req_state = HTTP_REQ_CONNECT; i = snprintf(buf, sizeof(buf), "connecting to host %s at %s\n" ,url.host ,iaddr_fmt(&iaddr, buf1, sizeof(buf1)) ); err = func(&req, buf, i, farg); if( err != 0 ) break; i = connect(sock, (struct sockaddr*)&iaddr, addrlen); assertb_sockerr(i==0); i = snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n" "Host: %s\r\n" "\r\n" ,url.path_args ,url.host ); req.req_state = HTTP_REQ_SEND; err = func(&req, buf, i, farg); if( err != 0 ) break; i = sock_send_timeout(sock, buf, i, 5000); assertb(i>=0); len = 0; got_header = 0; while(1) { assertb( len < sizeof(buf) ); i = recv(sock, buf+len, sizeof(buf)-len, 0); if( i < 0 ) { warn_sockerr(sock); req_err = HTTP_REQ_ERROR_INCOMPLETE; break; } if( i == 0 ) { req.req_state = HTTP_REQ_EOF; err = func(&req, 0, 0, farg); break; } len += i; if( !got_header ) { http_header_init(&http, 0); i = http_header_parse(&http, buf, len); if( i < 0 ) { req_err = HTTP_REQ_ERROR_BAD_RESPONSE; break; } if( i == 0 ) { continue; } got_header = 1; req.reply_max = http.content_len; req.req_state = HTTP_REQ_HEAD; err = func(&req, buf, http.header_len, farg); if( err != 0 ) { break; } len -= http.header_len; if( len > 0 ) { memmove(buf, buf+http.header_len, len); } } if( got_header ) { req.reply_len += len; req.req_state = HTTP_REQ_BODY; err = func(&req, buf, len, farg); len = 0; if( err ) { break; } } } req_err = 0; } while(0); if( got_header && http.response_code != 200 ) { req_err = HTTP_REQ_ERROR_FILE_NOT_FOUND; } if( req_err ) { req.req_state = HTTP_REQ_ERROR; req.req_error = req_err; err = func(&req, buf, len, farg); } if( url_parsed ) { url_free(&url); } if( got_header ) { http_header_free(&http); } return err; }