Beispiel #1
0
int uri_parse(struct uri *uri, const char *urib)
{
    const char *schemeb;
    const char *authb;
    const char *userinfob;
    const char *hostb;
    const char *pathb;
    const char *queryb;
    const char *fragb;

    size_t schemel;
    size_t userinfol;
    size_t hostl;
    size_t queryl;
    size_t fragl;
    size_t pathl;
    int port;

    int haveuserinfo;
    int havepath;
    int havequery;
    int havefragment;
    
    assert(uri);
    assert(urib);

    memset(uri, 0, sizeof(*uri));

    /* First, get the lengths and base addresses of all elements */

    schemeb = urib;
    if (uri_parse_scheme(schemeb, &schemel, &authb) == -1)
        return -1;

    if (*authb++ != '/' || *authb++ != '/')
        return -1;

    userinfob = authb;
    haveuserinfo = uria_parse_userinfo(userinfob, &userinfol, &hostb);

    if (uria_parse_host_port(hostb, &hostb, &hostl, &port, &pathb) == -1)
        return -1;

    havepath = uri_parse_path(pathb, &pathl, &queryb);
    havequery = uri_parse_query(&queryb, &queryl, &fragb);
    havefragment = uri_parse_fragment(&fragb, &fragl);

    /* Now centralize the allocations so we don't have to have too much error
     * handling code */
    uri->scheme = xstrndup(schemeb, schemel);
    uri->authority.port = port;
    uri->authority.hostname = xstrndup(hostb, hostl);

    if (haveuserinfo)
        uri->authority.opt = xstrndup(userinfob, userinfol);

    if (havepath)
        uri->path = xstrndup(pathb, pathl);

    if (havequery)
        uri->query = xstrndup(queryb, queryl);

    if (havefragment)
        uri->fragment = xstrndup(fragb, fragl);

    return 0;
}
Beispiel #2
0
int uri_parse(uri_t *uri, const char *str)
{
    char *s = NULL;
    char *q = NULL;
    char c = '\0';
    int ret = 0;

    if ((uri == NULL) || (str == NULL))
    {
        return -EINVAL;
    }

    uri_destroy(uri);

    s = strdup(str);
    if (s == NULL)
    {
        return -ENOMEM;
    }
    q = s;

    if (*s != '/')
    {
        /* read scheme */
        ret = uri_parse_scheme(uri, &q);
        if (ret != 0)
        {
            free(s);
            return ret;
        }
    }

    /* read hier-part */
    ret = uri_parse_hier_part(uri, &q);
    if (ret != 0)
    {
        free(s);
        return ret;
    }

    if (q != NULL)
    {
        /* check with the original input string */
        /* to see what character was overwritten */
        c = str[(q - 1) - s];
        if (c == '?')
        {
            ret = uri_parse_query(uri, &q);
            if (ret != 0)
            {
                free(s);
                return ret;
            }
        }
    }
    if (q != NULL)
    {
        /* check with the original input string */
        /* to see what character was overwritten */
        c = str[(q - 1) - s];
        if (c == '#')
        {
            ret = uri_parse_fragment(uri, &q);
            if (ret != 0)
            {
                free(s);
                return ret;
            }
        }
    }

    free(s);
    if (q != NULL)
    {
        return -EBADMSG;
    }
    return 0;
}
Beispiel #3
0
static bool http_url_do_parse(struct http_url_parser *url_parser)
{
	struct uri_parser *parser = &url_parser->parser;
	struct http_url *url = url_parser->url, *base = url_parser->base;
	const char *const *path;
	bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE,
		have_path = FALSE;
	int path_relative;
	const char *part;
	int ret;

	/* RFC 7230, Appendix B:

	   http-URI       = "http://" authority path-abempty [ "?" query ]
	                    [ "#" fragment ]
	   https-URI      = "https://" authority path-abempty [ "?" query ]
	                    [ "#" fragment ]
	   partial-URI    = relative-part [ "?" query ]

	   request-target = origin-form / absolute-form / authority-form /
	                    asterisk-form

	   origin-form    = absolute-path [ "?" query ]
	   absolute-form  = absolute-URI
	   authority-form = authority
	   asterisk-form  = "*"
	                  ; Not parsed here

	   absolute-path  = 1*( "/" segment )

	   RFC 3986, Appendix A: (implemented in uri-util.h)

	   absolute-URI   = scheme ":" hier-part [ "?" query ]

	   hier-part      = "//" authority path-abempty
	                  / path-absolute
	                  / path-rootless
	                  / path-empty

	   relative-part  = "//" authority path-abempty
	                  / path-absolute
	                  / path-noscheme
	                  / path-empty

	   authority     = [ userinfo "@" ] host [ ":" port ]

	   path-abempty   = *( "/" segment )
	   path-absolute  = "/" [ segment-nz *( "/" segment ) ]
	   path-noscheme  = segment-nz-nc *( "/" segment )
	   path-rootless  = segment-nz *( "/" segment )
	   path-empty     = 0<pchar>

	   segment        = *pchar
	   segment-nz     = 1*pchar
	   segment-nz-nc  = 1*( unreserved / pct-encoded / sub-delims / "@" )
                    ; non-zero-length segment without any colon ":"

	   query          = *( pchar / "/" / "?" )
	   fragment       = *( pchar / "/" / "?" )
	 */

	/* "http:" / "https:" */
	if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0) {
		const char *scheme;

		if ((ret = uri_parse_scheme(parser, &scheme)) < 0)
			return FALSE;
		else if (ret > 0) {
			if (strcasecmp(scheme, "https") == 0) {
				if (url != NULL)
					url->have_ssl = TRUE;
			} else if (strcasecmp(scheme, "http") != 0) {
				if (url_parser->request_target) {
					/* valid as non-HTTP scheme, but also try to parse as authority */
					parser->cur = parser->begin;
					if (!http_url_parse_authority_form(url_parser)) {
						url_parser->url = NULL; /* indicate non-http-url */
						url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
					}
					return TRUE;
				}
				parser->error = "Not an HTTP URL";
				return FALSE;
			}
			relative = FALSE;
			have_scheme = TRUE;
		}
	} else {
		relative = FALSE;
		have_scheme = TRUE;
	}

	/* "//" authority   ; or
	 * ["//"] authority ; when parsing a request target
	 */
	if (parser->cur < parser->end && parser->cur[0] == '/') {
		if (parser->cur+1 < parser->end && parser->cur[1] == '/') {
			parser->cur += 2;
			relative = FALSE;
			have_authority = TRUE;
		} else {
			/* start of absolute-path */
		}
	} else if (url_parser->request_target && !have_scheme) {
		if (!http_url_parse_authority_form(url_parser)) {
			/* not non-HTTP scheme and invalid as authority-form */
			parser->error = "Request target is invalid";
			return FALSE;
		}
		return TRUE;
	}

	if (have_scheme && !have_authority) {
		parser->error = "Absolute HTTP URL requires `//' after `http:'";
 		return FALSE;
	}

	if (have_authority) {
		if (!http_url_parse_authority(url_parser))
			return FALSE;
	}

	/* path-abempty / path-absolute / path-noscheme / path-empty */
	if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0)
		return FALSE;

	/* Relative URLs are only valid when we have a base URL */
	if (relative) {
		if (base == NULL) {
			parser->error = "Relative HTTP URL not allowed";
			return FALSE;
		} else if (!have_authority && url != NULL) {
			url->host_name = p_strdup_empty(parser->pool, base->host_name); 
			url->host_ip = base->host_ip;
			url->have_host_ip = base->have_host_ip;
			url->port = base->port;
			url->have_port = base->have_port;
			url->have_ssl = base->have_ssl;
			url->user = p_strdup_empty(parser->pool, base->user);
			url->password = p_strdup_empty(parser->pool, base->password);
		}

		url_parser->relative = TRUE;
	}

	/* Resolve path */
	if (ret > 0) {
		string_t *fullpath = NULL;

		have_path = TRUE;

		if (url != NULL)
			fullpath = t_str_new(256);

		if (relative && path_relative > 0 && base->path != NULL) {
			const char *pbegin = base->path;
			const char *pend = base->path + strlen(base->path);
			const char *p = pend - 1;

			i_assert(*pbegin == '/');

			/* discard trailing segments of base path based on how many effective
			   leading '..' segments were found in the relative path.
			 */
			while (path_relative > 0 && p > pbegin) {
				while (p > pbegin && *p != '/') p--;
				if (p >= pbegin) {
					pend = p;
					path_relative--;
				}
				if (p > pbegin) p--;
			}

			if (url != NULL && pend > pbegin)
				str_append_n(fullpath, pbegin, pend-pbegin);
		}

		/* append relative path */
		while (*path != NULL) {
			if (!uri_data_decode(parser, *path, NULL, &part))
				return FALSE;

			if (url != NULL) {
				str_append_c(fullpath, '/');
				str_append(fullpath, part);
			}
			path++;
		}

		if (url != NULL)
			url->path = p_strdup(parser->pool, str_c(fullpath));
	} else if (relative && url != NULL) {
		url->path = p_strdup(parser->pool, base->path);
	}

	/* [ "?" query ] */
	if ((ret = uri_parse_query(parser, &part)) < 0)
		return FALSE;
	if (ret > 0) {
		if (!uri_data_decode(parser, part, NULL, NULL)) // check only
			return FALSE;
		if (url != NULL)
			url->enc_query = p_strdup(parser->pool, part);
	} else if (relative && !have_path && url != NULL) {
		url->enc_query = p_strdup(parser->pool, base->enc_query);
	}

	/* [ "#" fragment ] */
	if ((ret = uri_parse_fragment(parser, &part)) < 0)
		return FALSE;
	if (ret > 0) {
		if ((url_parser->flags & HTTP_URL_ALLOW_FRAGMENT_PART) == 0) {
			parser->error = "URL fragment not allowed for HTTP URL in this context";
			return FALSE;
		}
		if (!uri_data_decode(parser, part, NULL, NULL)) // check only
			return FALSE;
		if (url != NULL)
			url->enc_fragment =  p_strdup(parser->pool, part);
	} else if (relative && !have_path && url != NULL) {
		url->enc_fragment = p_strdup(parser->pool, base->enc_fragment);
	}

	if (parser->cur != parser->end) {
		parser->error = "HTTP URL contains invalid character";
		return FALSE;
	}

	if (have_scheme)
		url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
	return TRUE;
}