Esempio n. 1
0
/**
 * soup_uri_normalize:
 * @part: a URI part
 * @unescape_extra: (allow-none): reserved characters to unescape (or %NULL)
 *
 * %<!-- -->-decodes any "unreserved" characters (or characters in
 * @unescape_extra) in @part, and %<!-- -->-encodes any non-ASCII
 * characters, spaces, and non-printing characters in @part.
 *
 * "Unreserved" characters are those that are not allowed to be used
 * for punctuation according to the URI spec. For example, letters are
 * unreserved, so soup_uri_normalize() will turn
 * <literal>http://example.com/foo/b%<!-- -->61r</literal> into
 * <literal>http://example.com/foo/bar</literal>, which is guaranteed
 * to mean the same thing. However, "/" is "reserved", so
 * <literal>http://example.com/foo%<!-- -->2Fbar</literal> would not
 * be changed, because it might mean something different to the
 * server.
 *
 * In the past, this would return %NULL if @part contained invalid
 * percent-encoding, but now it just ignores the problem (as
 * soup_uri_new() already did).
 *
 * Return value: the normalized URI part
 */
char *
soup_uri_normalize (const char *part, const char *unescape_extra)
{
    g_return_val_if_fail (part != NULL, NULL);

    return uri_normalized_copy (part, strlen (part), unescape_extra);
}
Esempio n. 2
0
/**
 * soup_uri_normalize:
 * @part: a URI part
 * @unescape_extra: reserved characters to unescape (or %NULL)
 *
 * %<!-- -->-decodes any "unreserved" characters (or characters in
 * @unescape_extra) in @part.
 *
 * "Unreserved" characters are those that are not allowed to be used
 * for punctuation according to the URI spec. For example, letters are
 * unreserved, so soup_uri_normalize() will turn
 * <literal>http://example.com/foo/b%<!-- -->61r</literal> into
 * <literal>http://example.com/foo/bar</literal>, which is guaranteed
 * to mean the same thing. However, "/" is "reserved", so
 * <literal>http://example.com/foo%<!-- -->2Fbar</literal> would not
 * be changed, because it might mean something different to the
 * server.
 *
 * In the past, this would return %NULL if @part contained invalid
 * percent-encoding, but now it just ignores the problem (as
 * soup_uri_new() already did).
 *
 * Return value: the normalized URI part
 */
char *
soup_uri_normalize (const char *part, const char *unescape_extra)
{
	return uri_normalized_copy (part, strlen (part), unescape_extra);
}
Esempio n. 3
0
/**
 * soup_uri_new_with_base:
 * @base: a base URI
 * @uri_string: the URI
 *
 * Parses @uri_string relative to @base.
 *
 * Return value: a parsed #SoupURI.
 **/
SoupURI *
soup_uri_new_with_base (SoupURI *base, const char *uri_string)
{
	SoupURI *uri;
	const char *end, *hash, *colon, *at, *path, *question;
	const char *p, *hostend;
	gboolean remove_dot_segments = TRUE;
	int len;

	/* First some cleanup steps (which are supposed to all be no-ops,
	 * but...). Skip initial whitespace, strip out internal tabs and
	 * line breaks, and ignore trailing whitespace.
	 */
	while (g_ascii_isspace (*uri_string))
		uri_string++;

	len = strcspn (uri_string, "\t\n\r");
	if (uri_string[len]) {
		char *clean = g_malloc (strlen (uri_string) + 1), *d;
		const char *s;

		for (s = uri_string, d = clean; *s; s++) {
			if (*s != '\t' && *s != '\n' && *s != '\r')
				*d++ = *s;
		}
		*d = '\0';

		uri = soup_uri_new_with_base (base, clean);
		g_free (clean);
		return uri;
	}
	end = uri_string + len;
	while (end > uri_string && g_ascii_isspace (end[-1]))
		end--;

	uri = g_slice_new0 (SoupURI);

	/* Find fragment. */
	hash = strchr (uri_string, '#');
	if (hash) {
		uri->fragment = uri_normalized_copy (hash + 1, end - hash + 1,
						     NULL);
		end = hash;
	}

	/* Find scheme: initial [a-z+.-]* substring until ":" */
	p = uri_string;
	while (p < end && (g_ascii_isalnum (*p) ||
			   *p == '.' || *p == '+' || *p == '-'))
		p++;

	if (p > uri_string && *p == ':') {
		uri->scheme = soup_uri_parse_scheme (uri_string, p - uri_string);
		uri_string = p + 1;
	}

	if (uri_string == end && !base && !uri->fragment)
		return uri;

	/* Check for authority */
	if (strncmp (uri_string, "//", 2) == 0) {
		uri_string += 2;

		path = uri_string + strcspn (uri_string, "/?#");
		if (path > end)
			path = end;
		at = strchr (uri_string, '@');
		if (at && at < path) {
			colon = strchr (uri_string, ':');
			if (colon && colon < at) {
				uri->password = uri_decoded_copy (colon + 1,
								  at - colon - 1);
			} else {
				uri->password = NULL;
				colon = at;
			}

			uri->user = uri_decoded_copy (uri_string,
						      colon - uri_string);
			uri_string = at + 1;
		} else
			uri->user = uri->password = NULL;

		/* Find host and port. */
		if (*uri_string == '[') {
			uri_string++;
			hostend = strchr (uri_string, ']');
			if (!hostend || hostend > path) {
				soup_uri_free (uri);
				return NULL;
			}
			if (*(hostend + 1) == ':')
				colon = hostend + 1;
			else
				colon = NULL;
		} else {
			colon = memchr (uri_string, ':', path - uri_string);
			hostend = colon ? colon : path;
		}

		uri->host = uri_decoded_copy (uri_string, hostend - uri_string);

		if (colon && colon != path - 1) {
			char *portend;
			uri->port = strtoul (colon + 1, &portend, 10);
			if (portend != (char *)path) {
				soup_uri_free (uri);
				return NULL;
			}
		}

		uri_string = path;
	}

	/* Find query */
	question = memchr (uri_string, '?', end - uri_string);
	if (question) {
		uri->query = uri_normalized_copy (question + 1,
						  end - (question + 1),
						  NULL);
		end = question;
	}

	if (end != uri_string) {
		uri->path = uri_normalized_copy (uri_string, end - uri_string,
						 NULL);
	}

	/* Apply base URI. This is spelled out in RFC 3986. */
	if (base && !uri->scheme && uri->host)
		uri->scheme = base->scheme;
	else if (base && !uri->scheme) {
		uri->scheme = base->scheme;
		uri->user = g_strdup (base->user);
		uri->password = g_strdup (base->password);
		uri->host = g_strdup (base->host);
		uri->port = base->port;

		if (!uri->path) {
			uri->path = g_strdup (base->path);
			if (!uri->query)
				uri->query = g_strdup (base->query);
			remove_dot_segments = FALSE;
		} else if (*uri->path != '/') {
			char *newpath, *last;

			last = strrchr (base->path, '/');
			if (last) {
				newpath = g_strdup_printf ("%.*s%s",
							   (int)(last + 1 - base->path),
							   base->path,
							   uri->path);
			} else
				newpath = g_strdup_printf ("/%s", uri->path);

			g_free (uri->path);
			uri->path = newpath;
		}
	}

	if (remove_dot_segments && uri->path && *uri->path) {
		char *p, *q;

		/* Remove "./" where "." is a complete segment. */
		for (p = uri->path + 1; *p; ) {
			if (*(p - 1) == '/' &&
			    *p == '.' && *(p + 1) == '/')
				memmove (p, p + 2, strlen (p + 2) + 1);
			else
				p++;
		}
		/* Remove "." at end. */
		if (p > uri->path + 2 &&
		    *(p - 1) == '.' && *(p - 2) == '/')
			*(p - 1) = '\0';

		/* Remove "<segment>/../" where <segment> != ".." */
		for (p = uri->path + 1; *p; ) {
			if (!strncmp (p, "../", 3)) {
				p += 3;
				continue;
			}
			q = strchr (p + 1, '/');
			if (!q)
				break;
			if (strncmp (q, "/../", 4) != 0) {
				p = q + 1;
				continue;
			}
			memmove (p, q + 4, strlen (q + 4) + 1);
			p = uri->path + 1;
		}
		/* Remove "<segment>/.." at end where <segment> != ".." */
		q = strrchr (uri->path, '/');
		if (q && !strcmp (q, "/..")) {
			p = q - 1;
			while (p > uri->path && *p != '/')
				p--;
			if (strncmp (p, "/../", 4) != 0)
				*(p + 1) = 0;
		}

		/* Remove extraneous initial "/.."s */
		while (!strncmp (uri->path, "/../", 4))
			memmove (uri->path, uri->path + 3, strlen (uri->path) - 2);
		if (!strcmp (uri->path, "/.."))
			uri->path[1] = '\0';
	}

	/* HTTP-specific stuff */
	if (uri->scheme == SOUP_URI_SCHEME_HTTP ||
	    uri->scheme == SOUP_URI_SCHEME_HTTPS) {
		if (!uri->path)
			uri->path = g_strdup ("/");
		if (!SOUP_URI_VALID_FOR_HTTP (uri)) {
			soup_uri_free (uri);
			return NULL;
		}
	}

	if (uri->scheme == SOUP_URI_SCHEME_FTP) {
		if (!uri->host) {
			soup_uri_free (uri);
			return NULL;
		}
	}

	if (!uri->port)
		uri->port = soup_scheme_default_port (uri->scheme);
	if (!uri->path)
		uri->path = g_strdup ("");

	return uri;
}
Esempio n. 4
0
/**
 * soup_uri_new_with_base:
 * @base: a base URI
 * @uri_string: the URI
 *
 * Parses @uri_string relative to @base.
 *
 * Return value: a parsed #SoupURI.
 **/
SoupURI *
soup_uri_new_with_base (SoupURI *base, const char *uri_string)
{
	SoupURI *uri;
	const char *end, *hash, *colon, *at, *path, *question;
	const char *p, *hostend;
	gboolean remove_dot_segments = TRUE;

	uri = g_slice_new0 (SoupURI);

	/* See RFC 3986 for details. IF YOU CHANGE ANYTHING IN THIS
	 * FUNCTION, RUN tests/uri-parsing AFTERWARDS.
	 */

	/* Find fragment. */
	end = hash = strchr (uri_string, '#');
	if (hash && hash[1]) {
		uri->fragment = uri_normalized_copy (hash + 1, strlen (hash + 1),
						     NULL, FALSE);
		if (!uri->fragment) {
			soup_uri_free (uri);
			return NULL;
		}
	} else
		end = uri_string + strlen (uri_string);

	/* Find scheme: initial [a-z+.-]* substring until ":" */
	p = uri_string;
	while (p < end && (g_ascii_isalnum (*p) ||
			   *p == '.' || *p == '+' || *p == '-'))
		p++;

	if (p > uri_string && *p == ':') {
		uri->scheme = soup_uri_get_scheme (uri_string, p - uri_string);
		if (!uri->scheme) {
			soup_uri_free (uri);
			return NULL;
		}
		uri_string = p + 1;
	}

	if (!*uri_string && !base)
		return uri;

	/* Check for authority */
	if (strncmp (uri_string, "//", 2) == 0) {
		uri_string += 2;

		path = uri_string + strcspn (uri_string, "/?#");
		at = strchr (uri_string, '@');
		if (at && at < path) {
			colon = strchr (uri_string, ':');
			if (colon && colon < at) {
				uri->password = uri_decoded_copy (colon + 1,
								  at - colon - 1);
				if (!uri->password) {
					soup_uri_free (uri);
					return NULL;
				}
			} else {
				uri->password = NULL;
				colon = at;
			}

			uri->user = uri_decoded_copy (uri_string,
						      colon - uri_string);
			if (!uri->user) {
				soup_uri_free (uri);
				return NULL;
			}
			uri_string = at + 1;
		} else
			uri->user = uri->password = NULL;

		/* Find host and port. */
		if (*uri_string == '[') {
			uri_string++;
			hostend = strchr (uri_string, ']');
			if (!hostend || hostend > path) {
				soup_uri_free (uri);
				return NULL;
			}
			if (*(hostend + 1) == ':')
				colon = hostend + 1;
			else
				colon = NULL;
		} else {
			colon = memchr (uri_string, ':', path - uri_string);
			hostend = colon ? colon : path;
		}

		uri->host = uri_decoded_copy (uri_string, hostend - uri_string);
		if (!uri->host) {
			soup_uri_free (uri);
			return NULL;
		}

		if (colon && colon != path - 1) {
			char *portend;
			uri->port = strtoul (colon + 1, &portend, 10);
			if (portend != (char *)path) {
				soup_uri_free (uri);
				return NULL;
			}
		}

		uri_string = path;
	}

	/* Find query */
	question = memchr (uri_string, '?', end - uri_string);
	if (question) {
		if (question[1]) {
			uri->query = uri_normalized_copy (question + 1,
							  end - (question + 1),
							  NULL, TRUE);
			if (!uri->query) {
				soup_uri_free (uri);
				return NULL;
			}
		}
		end = question;
	}

	if (end != uri_string) {
		uri->path = uri_normalized_copy (uri_string, end - uri_string,
						 NULL, TRUE);
		if (!uri->path) {
			soup_uri_free (uri);
			return NULL;
		}
	}

	/* Apply base URI. Again, this is spelled out in RFC 3986. */
	if (base && !uri->scheme && uri->host)
		uri->scheme = base->scheme;
	else if (base && !uri->scheme) {
		uri->scheme = base->scheme;
		uri->user = g_strdup (base->user);
		uri->password = g_strdup (base->password);
		uri->host = g_strdup (base->host);
		uri->port = base->port;

		if (!uri->path) {
			uri->path = g_strdup (base->path);
			if (!uri->query)
				uri->query = g_strdup (base->query);
			remove_dot_segments = FALSE;
		} else if (*uri->path != '/') {
			char *newpath, *last;

			last = strrchr (base->path, '/');
			if (last) {
				newpath = g_strdup_printf ("%.*s/%s",
							   (int)(last - base->path),
							   base->path,
							   uri->path);
			} else
				newpath = g_strdup_printf ("/%s", uri->path);

			g_free (uri->path);
			uri->path = newpath;
		}
	}

	if (remove_dot_segments && uri->path && *uri->path) {
		char *p = uri->path, *q;

		/* Remove "./" where "." is a complete segment. */
		for (p = uri->path + 1; *p; ) {
			if (*(p - 1) == '/' &&
			    *p == '.' && *(p + 1) == '/')
				memmove (p, p + 2, strlen (p + 2) + 1);
			else
				p++;
		}
		/* Remove "." at end. */
		if (p > uri->path + 2 &&
		    *(p - 1) == '.' && *(p - 2) == '/')
			*(p - 1) = '\0';

		/* Remove "<segment>/../" where <segment> != ".." */
		for (p = uri->path + 1; *p; ) {
			if (!strncmp (p, "../", 3)) {
				p += 3;
				continue;
			}
			q = strchr (p + 1, '/');
			if (!q)
				break;
			if (strncmp (q, "/../", 4) != 0) {
				p = q + 1;
				continue;
			}
			memmove (p, q + 4, strlen (q + 4) + 1);
			p = uri->path + 1;
		}
		/* Remove "<segment>/.." at end where <segment> != ".." */
		q = strrchr (uri->path, '/');
		if (q && !strcmp (q, "/..")) {
			p = q - 1;
			while (p > uri->path && *p != '/')
				p--;
			if (strncmp (p, "/../", 4) != 0)
				*(p + 1) = 0;
		}

		/* Remove extraneous initial "/.."s */
		while (!strncmp (uri->path, "/../", 4))
			memmove (uri->path, uri->path + 3, strlen (uri->path) - 2);
		if (!strcmp (uri->path, "/.."))
			uri->path[1] = '\0';
	}

	/* HTTP-specific stuff */
	if (uri->scheme == SOUP_URI_SCHEME_HTTP ||
	    uri->scheme == SOUP_URI_SCHEME_HTTPS) {
		if (!SOUP_URI_VALID_FOR_HTTP (uri)) {
			soup_uri_free (uri);
			return NULL;
		}
		if (!uri->path)
			uri->path = g_strdup ("/");
	}

	if (!uri->port)
		uri->port = soup_scheme_default_port (uri->scheme);
	if (!uri->path)
		uri->path = g_strdup ("");

	return uri;
}