int uri_parse(struct uri *uri, const char *urib) { const char *schemeb; const char *authb; const char *userinfob; const char *hostb; const char *pathb; const char *queryb; const char *fragb; size_t schemel; size_t userinfol; size_t hostl; size_t queryl; size_t fragl; size_t pathl; int port; int haveuserinfo; int havepath; int havequery; int havefragment; assert(uri); assert(urib); memset(uri, 0, sizeof(*uri)); /* First, get the lengths and base addresses of all elements */ schemeb = urib; if (uri_parse_scheme(schemeb, &schemel, &authb) == -1) return -1; if (*authb++ != '/' || *authb++ != '/') return -1; userinfob = authb; haveuserinfo = uria_parse_userinfo(userinfob, &userinfol, &hostb); if (uria_parse_host_port(hostb, &hostb, &hostl, &port, &pathb) == -1) return -1; havepath = uri_parse_path(pathb, &pathl, &queryb); havequery = uri_parse_query(&queryb, &queryl, &fragb); havefragment = uri_parse_fragment(&fragb, &fragl); /* Now centralize the allocations so we don't have to have too much error * handling code */ uri->scheme = xstrndup(schemeb, schemel); uri->authority.port = port; uri->authority.hostname = xstrndup(hostb, hostl); if (haveuserinfo) uri->authority.opt = xstrndup(userinfob, userinfol); if (havepath) uri->path = xstrndup(pathb, pathl); if (havequery) uri->query = xstrndup(queryb, queryl); if (havefragment) uri->fragment = xstrndup(fragb, fragl); return 0; }
int uri_parse(uri_t *uri, const char *str) { char *s = NULL; char *q = NULL; char c = '\0'; int ret = 0; if ((uri == NULL) || (str == NULL)) { return -EINVAL; } uri_destroy(uri); s = strdup(str); if (s == NULL) { return -ENOMEM; } q = s; if (*s != '/') { /* read scheme */ ret = uri_parse_scheme(uri, &q); if (ret != 0) { free(s); return ret; } } /* read hier-part */ ret = uri_parse_hier_part(uri, &q); if (ret != 0) { free(s); return ret; } if (q != NULL) { /* check with the original input string */ /* to see what character was overwritten */ c = str[(q - 1) - s]; if (c == '?') { ret = uri_parse_query(uri, &q); if (ret != 0) { free(s); return ret; } } } if (q != NULL) { /* check with the original input string */ /* to see what character was overwritten */ c = str[(q - 1) - s]; if (c == '#') { ret = uri_parse_fragment(uri, &q); if (ret != 0) { free(s); return ret; } } } free(s); if (q != NULL) { return -EBADMSG; } return 0; }
static bool http_url_do_parse(struct http_url_parser *url_parser) { struct uri_parser *parser = &url_parser->parser; struct http_url *url = url_parser->url, *base = url_parser->base; const char *const *path; bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE, have_path = FALSE; int path_relative; const char *part; int ret; /* RFC 7230, Appendix B: http-URI = "http://" authority path-abempty [ "?" query ] [ "#" fragment ] https-URI = "https://" authority path-abempty [ "?" query ] [ "#" fragment ] partial-URI = relative-part [ "?" query ] request-target = origin-form / absolute-form / authority-form / asterisk-form origin-form = absolute-path [ "?" query ] absolute-form = absolute-URI authority-form = authority asterisk-form = "*" ; Not parsed here absolute-path = 1*( "/" segment ) RFC 3986, Appendix A: (implemented in uri-util.h) absolute-URI = scheme ":" hier-part [ "?" query ] hier-part = "//" authority path-abempty / path-absolute / path-rootless / path-empty relative-part = "//" authority path-abempty / path-absolute / path-noscheme / path-empty authority = [ userinfo "@" ] host [ ":" port ] path-abempty = *( "/" segment ) path-absolute = "/" [ segment-nz *( "/" segment ) ] path-noscheme = segment-nz-nc *( "/" segment ) path-rootless = segment-nz *( "/" segment ) path-empty = 0<pchar> segment = *pchar segment-nz = 1*pchar segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) ; non-zero-length segment without any colon ":" query = *( pchar / "/" / "?" ) fragment = *( pchar / "/" / "?" ) */ /* "http:" / "https:" */ if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0) { const char *scheme; if ((ret = uri_parse_scheme(parser, &scheme)) < 0) return FALSE; else if (ret > 0) { if (strcasecmp(scheme, "https") == 0) { if (url != NULL) url->have_ssl = TRUE; } else if (strcasecmp(scheme, "http") != 0) { if (url_parser->request_target) { /* valid as non-HTTP scheme, but also try to parse as authority */ parser->cur = parser->begin; if (!http_url_parse_authority_form(url_parser)) { url_parser->url = NULL; /* indicate non-http-url */ url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE; } return TRUE; } parser->error = "Not an HTTP URL"; return FALSE; } relative = FALSE; have_scheme = TRUE; } } else { relative = FALSE; have_scheme = TRUE; } /* "//" authority ; or * ["//"] authority ; when parsing a request target */ if (parser->cur < parser->end && parser->cur[0] == '/') { if (parser->cur+1 < parser->end && parser->cur[1] == '/') { parser->cur += 2; relative = FALSE; have_authority = TRUE; } else { /* start of absolute-path */ } } else if (url_parser->request_target && !have_scheme) { if (!http_url_parse_authority_form(url_parser)) { /* not non-HTTP scheme and invalid as authority-form */ parser->error = "Request target is invalid"; return FALSE; } return TRUE; } if (have_scheme && !have_authority) { parser->error = "Absolute HTTP URL requires `//' after `http:'"; return FALSE; } if (have_authority) { if (!http_url_parse_authority(url_parser)) return FALSE; } /* path-abempty / path-absolute / path-noscheme / path-empty */ if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0) return FALSE; /* Relative URLs are only valid when we have a base URL */ if (relative) { if (base == NULL) { parser->error = "Relative HTTP URL not allowed"; return FALSE; } else if (!have_authority && url != NULL) { url->host_name = p_strdup_empty(parser->pool, base->host_name); url->host_ip = base->host_ip; url->have_host_ip = base->have_host_ip; url->port = base->port; url->have_port = base->have_port; url->have_ssl = base->have_ssl; url->user = p_strdup_empty(parser->pool, base->user); url->password = p_strdup_empty(parser->pool, base->password); } url_parser->relative = TRUE; } /* Resolve path */ if (ret > 0) { string_t *fullpath = NULL; have_path = TRUE; if (url != NULL) fullpath = t_str_new(256); if (relative && path_relative > 0 && base->path != NULL) { const char *pbegin = base->path; const char *pend = base->path + strlen(base->path); const char *p = pend - 1; i_assert(*pbegin == '/'); /* discard trailing segments of base path based on how many effective leading '..' segments were found in the relative path. */ while (path_relative > 0 && p > pbegin) { while (p > pbegin && *p != '/') p--; if (p >= pbegin) { pend = p; path_relative--; } if (p > pbegin) p--; } if (url != NULL && pend > pbegin) str_append_n(fullpath, pbegin, pend-pbegin); } /* append relative path */ while (*path != NULL) { if (!uri_data_decode(parser, *path, NULL, &part)) return FALSE; if (url != NULL) { str_append_c(fullpath, '/'); str_append(fullpath, part); } path++; } if (url != NULL) url->path = p_strdup(parser->pool, str_c(fullpath)); } else if (relative && url != NULL) { url->path = p_strdup(parser->pool, base->path); } /* [ "?" query ] */ if ((ret = uri_parse_query(parser, &part)) < 0) return FALSE; if (ret > 0) { if (!uri_data_decode(parser, part, NULL, NULL)) // check only return FALSE; if (url != NULL) url->enc_query = p_strdup(parser->pool, part); } else if (relative && !have_path && url != NULL) { url->enc_query = p_strdup(parser->pool, base->enc_query); } /* [ "#" fragment ] */ if ((ret = uri_parse_fragment(parser, &part)) < 0) return FALSE; if (ret > 0) { if ((url_parser->flags & HTTP_URL_ALLOW_FRAGMENT_PART) == 0) { parser->error = "URL fragment not allowed for HTTP URL in this context"; return FALSE; } if (!uri_data_decode(parser, part, NULL, NULL)) // check only return FALSE; if (url != NULL) url->enc_fragment = p_strdup(parser->pool, part); } else if (relative && !have_path && url != NULL) { url->enc_fragment = p_strdup(parser->pool, base->enc_fragment); } if (parser->cur != parser->end) { parser->error = "HTTP URL contains invalid character"; return FALSE; } if (have_scheme) url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE; return TRUE; }