int http_url_request_target_parse(const char *request_target, const char *host_header, pool_t pool, struct http_request_target *target, const char **error_r) { struct http_url_parser url_parser; struct uri_parser *parser; struct uri_authority host; struct http_url base; memset(&url_parser, '\0', sizeof(url_parser)); parser = &url_parser.parser; uri_parser_init(parser, pool, host_header); if (uri_parse_authority(parser, &host) <= 0) { *error_r = t_strdup_printf("Invalid Host header: %s", parser->error); return -1; } if (parser->cur != parser->end || host.enc_userinfo != NULL) { *error_r = "Invalid Host header: Contains invalid character"; return -1; } if (request_target[0] == '*' && request_target[1] == '\0') { struct http_url *url = p_new(pool, struct http_url, 1); url->host_name = p_strdup(pool, host.host_literal); url->host_ip = host.host_ip; url->port = host.port; url->have_host_ip = host.have_host_ip; url->have_port = host.have_port; target->url = url; target->format = HTTP_REQUEST_TARGET_FORMAT_ASTERISK; return 0; } memset(&base, 0, sizeof(base)); base.host_name = host.host_literal; base.host_ip = host.host_ip; base.port = host.port; base.have_host_ip = host.have_host_ip; base.have_port = host.have_port; memset(parser, '\0', sizeof(*parser)); uri_parser_init(parser, pool, request_target); url_parser.url = p_new(pool, struct http_url, 1); url_parser.request_target = TRUE; url_parser.req_format = HTTP_REQUEST_TARGET_FORMAT_ORIGIN; url_parser.base = &base; url_parser.flags = 0; if (!http_url_do_parse(&url_parser)) { *error_r = url_parser.parser.error; return -1; } target->url = url_parser.url; target->format = url_parser.req_format; return 0; }
static int parse_uri(const char *proxystr, struct uri *uri) { const char *p, *q; /* Scheme, section 3.1. */ p = proxystr; if (!isalpha(*p)) goto fail; q = p; while (isalpha(*q) || isdigit(*q) || *q == '+' || *q == '-' || *q == '.') q++; if (*q != ':') goto fail; uri->scheme = mkstr(p, q); /* "An implementation should accept uppercase letters as equivalent to * lowercase in scheme names (e.g., allow "HTTP" as well as "http") for the * sake of robustness..." */ lowercase(uri->scheme); /* Authority, section 3.2. */ p = q + 1; if (*p == '/' && *(p + 1) == '/') { char *authority = NULL; p += 2; q = p; while (!(*q == '/' || *q == '?' || *q == '#' || *q == '\0')) q++; ; authority = mkstr(p, q); if (uri_parse_authority(authority, uri) < 0) { free(authority); goto fail; } free(authority); p = q; } /* Path, section 3.3. We include the query and fragment in the path. The * path is also not percent-decoded because we just pass it on to the origin * server. */ q = strchr(p, '\0'); uri->path = mkstr(p, q); return 1; fail: uri_free(uri); return -1; }
int uri_parse(const char *uri_s, struct uri *uri) { const char *p, *q; uri_init(uri); /* Scheme, section 3.1. */ p = uri_s; if (!is_alpha_char(*p)) goto fail; q = p; while (is_alpha_char(*q) || is_digit_char(*q) || *q == '+' || *q == '-' || *q == '.') q++; if (*q != ':') goto fail; uri->scheme = mkstr(p, q); lowercase(uri->scheme); /* Authority, section 3.2. */ p = q + 1; if (*p == '/' && *(p + 1) == '/') { char *authority = NULL; p += 2; q = p; while (!(*q == '/' || *q == '?' || *q == '#' || *q == '\0')) q++; authority = mkstr(p, q); if (uri_parse_authority(uri, authority)) { free(authority); goto fail; } free(authority); p = q; } q = strchr(p, '\0'); uri->path = mkstr(p, q); return 0; fail: uri_free(uri); return -EINVAL; }
int uri_parse_slashslash_authority(struct uri_parser *parser, struct uri_authority *auth) { /* "//" authority */ if ((parser->end - parser->cur) <= 2 || parser->cur[0] != '/' || parser->cur[1] != '/') return 0; parser->cur += 2; return uri_parse_authority(parser, auth); }
/* Parse a URI string into a struct URI. Any parts of the URI that are absent will become NULL entries in the structure, except for the port which will be -1. Returns NULL on error. See RFC 3986, section 3 for syntax. */ struct uri *uri_parse(struct uri *uri, const char *uri_s) { const char *p, *q; uri_init(uri); /* Scheme, section 3.1. */ p = uri_s; if (!is_alpha_char(*p)) goto fail; for (q = p; is_alpha_char(*q) || is_digit_char(*q) || *q == '+' || *q == '-' || *q == '.'; q++) ; if (*q != ':') goto fail; uri->scheme = mkstr(p, q); /* "An implementation should accept uppercase letters as equivalent to lowercase in scheme names (e.g., allow "HTTP" as well as "http") for the sake of robustness..." */ lowercase(uri->scheme); /* Authority, section 3.2. */ p = q + 1; if (*p == '/' && *(p + 1) == '/') { char *authority = NULL; p += 2; for (q = p; !(*q == '/' || *q == '?' || *q == '#' || *q == '\0'); q++) ; authority = mkstr(p, q); if (uri_parse_authority(uri, authority) == NULL) { free(authority); goto fail; } free(authority); p = q; } if (uri->port == -1) uri->port = scheme_default_port(uri->scheme); /* Path, section 3.3. We include the query and fragment in the path. The path is also not percent-decoded because we just pass it on to the origin server. */ q = strchr(p, '\0'); uri->path = mkstr(p, q); return uri; fail: uri_free(uri); return NULL; }
static bool http_url_parse_authority(struct http_url_parser *url_parser) { struct uri_parser *parser = &url_parser->parser; struct http_url *url = url_parser->url; struct uri_authority auth; const char *user = NULL, *password = NULL; int ret; if ((ret = uri_parse_authority(parser, &auth)) < 0) return FALSE; if (ret > 0) { if (auth.enc_userinfo != NULL) { const char *p; if ((url_parser->flags & HTTP_URL_ALLOW_USERINFO_PART) == 0) { /* RFC 7230, Section 2.7.1: http URI Scheme A sender MUST NOT generate the userinfo subcomponent (and its "@" delimiter) when an "http" URI reference is generated within a message as a request target or header field value. Before making use of an "http" URI reference received from an untrusted source, a recipient SHOULD parse for userinfo and treat its presence as an error; it is likely being used to obscure the authority for the sake of phishing attacks. */ parser->error = "HTTP URL does not allow `userinfo@' part"; return FALSE; } p = strchr(auth.enc_userinfo, ':'); if (p == NULL) { if (!uri_data_decode(parser, auth.enc_userinfo, NULL, &user)) return FALSE; } else { if (!uri_data_decode(parser, auth.enc_userinfo, p, &user)) return FALSE; if (!uri_data_decode(parser, p+1, NULL, &password)) return FALSE; } } } if (url != NULL) { url->host_name = p_strdup(parser->pool, auth.host_literal); url->host_ip = auth.host_ip; url->have_host_ip = auth.have_host_ip; url->port = auth.port; url->have_port = auth.have_port; url->user = p_strdup(parser->pool, user); url->password = p_strdup(parser->pool, password); } return TRUE; }
int http_parse_request_line(const char *line, struct http_request *request) { const char *p, *q; struct uri *uri; char *uri_s; http_request_init(request); p = line; while (*p == ' ') p++; /* Method (CONNECT, GET, etc.). */ q = p; while (is_token_char(*q)) q++; if (p == q) goto badreq; request->method = mkstr(p, q); /* URI. */ p = q; while (*p == ' ') p++; q = p; while (*q != '\0' && *q != ' ') q++; if (p == q) goto badreq; uri_s = mkstr(p, q); /* RFC 2616, section 5.1.1: The method is case-sensitive. RFC 2616, section 5.1.2: Request-URI = "*" | absoluteURI | abs_path | authority The absoluteURI form is REQUIRED when the request is being made to a proxy... The authority form is only used by the CONNECT method. */ if (strcmp(request->method, "CONNECT") == 0) { uri = uri_parse_authority(&request->uri, uri_s); } else { uri = uri_parse(&request->uri, uri_s); } free(uri_s); if (uri == NULL) /* The URI parsing failed. */ goto badreq; /* Version number. */ p = q; while (*p == ' ') p++; if (*p == '\0') { /* No HTTP/X.X version number indicates version 0.9. */ request->version = HTTP_09; } else { q = parse_http_version(p, &request->version); if (p == q) goto badreq; } return 0; badreq: http_request_free(request); return 400; }