/** * soup_uri_normalize: * @part: a URI part * @unescape_extra: (allow-none): reserved characters to unescape (or %NULL) * * %<!-- -->-decodes any "unreserved" characters (or characters in * @unescape_extra) in @part, and %<!-- -->-encodes any non-ASCII * characters, spaces, and non-printing characters in @part. * * "Unreserved" characters are those that are not allowed to be used * for punctuation according to the URI spec. For example, letters are * unreserved, so soup_uri_normalize() will turn * <literal>http://example.com/foo/b%<!-- -->61r</literal> into * <literal>http://example.com/foo/bar</literal>, which is guaranteed * to mean the same thing. However, "/" is "reserved", so * <literal>http://example.com/foo%<!-- -->2Fbar</literal> would not * be changed, because it might mean something different to the * server. * * In the past, this would return %NULL if @part contained invalid * percent-encoding, but now it just ignores the problem (as * soup_uri_new() already did). * * Return value: the normalized URI part */ char * soup_uri_normalize (const char *part, const char *unescape_extra) { g_return_val_if_fail (part != NULL, NULL); return uri_normalized_copy (part, strlen (part), unescape_extra); }
/** * soup_uri_normalize: * @part: a URI part * @unescape_extra: reserved characters to unescape (or %NULL) * * %<!-- -->-decodes any "unreserved" characters (or characters in * @unescape_extra) in @part. * * "Unreserved" characters are those that are not allowed to be used * for punctuation according to the URI spec. For example, letters are * unreserved, so soup_uri_normalize() will turn * <literal>http://example.com/foo/b%<!-- -->61r</literal> into * <literal>http://example.com/foo/bar</literal>, which is guaranteed * to mean the same thing. However, "/" is "reserved", so * <literal>http://example.com/foo%<!-- -->2Fbar</literal> would not * be changed, because it might mean something different to the * server. * * In the past, this would return %NULL if @part contained invalid * percent-encoding, but now it just ignores the problem (as * soup_uri_new() already did). * * Return value: the normalized URI part */ char * soup_uri_normalize (const char *part, const char *unescape_extra) { return uri_normalized_copy (part, strlen (part), unescape_extra); }
/** * soup_uri_new_with_base: * @base: a base URI * @uri_string: the URI * * Parses @uri_string relative to @base. * * Return value: a parsed #SoupURI. **/ SoupURI * soup_uri_new_with_base (SoupURI *base, const char *uri_string) { SoupURI *uri; const char *end, *hash, *colon, *at, *path, *question; const char *p, *hostend; gboolean remove_dot_segments = TRUE; int len; /* First some cleanup steps (which are supposed to all be no-ops, * but...). Skip initial whitespace, strip out internal tabs and * line breaks, and ignore trailing whitespace. */ while (g_ascii_isspace (*uri_string)) uri_string++; len = strcspn (uri_string, "\t\n\r"); if (uri_string[len]) { char *clean = g_malloc (strlen (uri_string) + 1), *d; const char *s; for (s = uri_string, d = clean; *s; s++) { if (*s != '\t' && *s != '\n' && *s != '\r') *d++ = *s; } *d = '\0'; uri = soup_uri_new_with_base (base, clean); g_free (clean); return uri; } end = uri_string + len; while (end > uri_string && g_ascii_isspace (end[-1])) end--; uri = g_slice_new0 (SoupURI); /* Find fragment. */ hash = strchr (uri_string, '#'); if (hash) { uri->fragment = uri_normalized_copy (hash + 1, end - hash + 1, NULL); end = hash; } /* Find scheme: initial [a-z+.-]* substring until ":" */ p = uri_string; while (p < end && (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')) p++; if (p > uri_string && *p == ':') { uri->scheme = soup_uri_parse_scheme (uri_string, p - uri_string); uri_string = p + 1; } if (uri_string == end && !base && !uri->fragment) return uri; /* Check for authority */ if (strncmp (uri_string, "//", 2) == 0) { uri_string += 2; path = uri_string + strcspn (uri_string, "/?#"); if (path > end) path = end; at = strchr (uri_string, '@'); if (at && at < path) { colon = strchr (uri_string, ':'); if (colon && colon < at) { uri->password = uri_decoded_copy (colon + 1, at - colon - 1); } else { uri->password = NULL; colon = at; } uri->user = uri_decoded_copy (uri_string, colon - uri_string); uri_string = at + 1; } else uri->user = uri->password = NULL; /* Find host and port. */ if (*uri_string == '[') { uri_string++; hostend = strchr (uri_string, ']'); if (!hostend || hostend > path) { soup_uri_free (uri); return NULL; } if (*(hostend + 1) == ':') colon = hostend + 1; else colon = NULL; } else { colon = memchr (uri_string, ':', path - uri_string); hostend = colon ? colon : path; } uri->host = uri_decoded_copy (uri_string, hostend - uri_string); if (colon && colon != path - 1) { char *portend; uri->port = strtoul (colon + 1, &portend, 10); if (portend != (char *)path) { soup_uri_free (uri); return NULL; } } uri_string = path; } /* Find query */ question = memchr (uri_string, '?', end - uri_string); if (question) { uri->query = uri_normalized_copy (question + 1, end - (question + 1), NULL); end = question; } if (end != uri_string) { uri->path = uri_normalized_copy (uri_string, end - uri_string, NULL); } /* Apply base URI. This is spelled out in RFC 3986. */ if (base && !uri->scheme && uri->host) uri->scheme = base->scheme; else if (base && !uri->scheme) { uri->scheme = base->scheme; uri->user = g_strdup (base->user); uri->password = g_strdup (base->password); uri->host = g_strdup (base->host); uri->port = base->port; if (!uri->path) { uri->path = g_strdup (base->path); if (!uri->query) uri->query = g_strdup (base->query); remove_dot_segments = FALSE; } else if (*uri->path != '/') { char *newpath, *last; last = strrchr (base->path, '/'); if (last) { newpath = g_strdup_printf ("%.*s%s", (int)(last + 1 - base->path), base->path, uri->path); } else newpath = g_strdup_printf ("/%s", uri->path); g_free (uri->path); uri->path = newpath; } } if (remove_dot_segments && uri->path && *uri->path) { char *p, *q; /* Remove "./" where "." is a complete segment. */ for (p = uri->path + 1; *p; ) { if (*(p - 1) == '/' && *p == '.' && *(p + 1) == '/') memmove (p, p + 2, strlen (p + 2) + 1); else p++; } /* Remove "." at end. */ if (p > uri->path + 2 && *(p - 1) == '.' && *(p - 2) == '/') *(p - 1) = '\0'; /* Remove "<segment>/../" where <segment> != ".." */ for (p = uri->path + 1; *p; ) { if (!strncmp (p, "../", 3)) { p += 3; continue; } q = strchr (p + 1, '/'); if (!q) break; if (strncmp (q, "/../", 4) != 0) { p = q + 1; continue; } memmove (p, q + 4, strlen (q + 4) + 1); p = uri->path + 1; } /* Remove "<segment>/.." at end where <segment> != ".." */ q = strrchr (uri->path, '/'); if (q && !strcmp (q, "/..")) { p = q - 1; while (p > uri->path && *p != '/') p--; if (strncmp (p, "/../", 4) != 0) *(p + 1) = 0; } /* Remove extraneous initial "/.."s */ while (!strncmp (uri->path, "/../", 4)) memmove (uri->path, uri->path + 3, strlen (uri->path) - 2); if (!strcmp (uri->path, "/..")) uri->path[1] = '\0'; } /* HTTP-specific stuff */ if (uri->scheme == SOUP_URI_SCHEME_HTTP || uri->scheme == SOUP_URI_SCHEME_HTTPS) { if (!uri->path) uri->path = g_strdup ("/"); if (!SOUP_URI_VALID_FOR_HTTP (uri)) { soup_uri_free (uri); return NULL; } } if (uri->scheme == SOUP_URI_SCHEME_FTP) { if (!uri->host) { soup_uri_free (uri); return NULL; } } if (!uri->port) uri->port = soup_scheme_default_port (uri->scheme); if (!uri->path) uri->path = g_strdup (""); return uri; }
/** * soup_uri_new_with_base: * @base: a base URI * @uri_string: the URI * * Parses @uri_string relative to @base. * * Return value: a parsed #SoupURI. **/ SoupURI * soup_uri_new_with_base (SoupURI *base, const char *uri_string) { SoupURI *uri; const char *end, *hash, *colon, *at, *path, *question; const char *p, *hostend; gboolean remove_dot_segments = TRUE; uri = g_slice_new0 (SoupURI); /* See RFC 3986 for details. IF YOU CHANGE ANYTHING IN THIS * FUNCTION, RUN tests/uri-parsing AFTERWARDS. */ /* Find fragment. */ end = hash = strchr (uri_string, '#'); if (hash && hash[1]) { uri->fragment = uri_normalized_copy (hash + 1, strlen (hash + 1), NULL, FALSE); if (!uri->fragment) { soup_uri_free (uri); return NULL; } } else end = uri_string + strlen (uri_string); /* Find scheme: initial [a-z+.-]* substring until ":" */ p = uri_string; while (p < end && (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')) p++; if (p > uri_string && *p == ':') { uri->scheme = soup_uri_get_scheme (uri_string, p - uri_string); if (!uri->scheme) { soup_uri_free (uri); return NULL; } uri_string = p + 1; } if (!*uri_string && !base) return uri; /* Check for authority */ if (strncmp (uri_string, "//", 2) == 0) { uri_string += 2; path = uri_string + strcspn (uri_string, "/?#"); at = strchr (uri_string, '@'); if (at && at < path) { colon = strchr (uri_string, ':'); if (colon && colon < at) { uri->password = uri_decoded_copy (colon + 1, at - colon - 1); if (!uri->password) { soup_uri_free (uri); return NULL; } } else { uri->password = NULL; colon = at; } uri->user = uri_decoded_copy (uri_string, colon - uri_string); if (!uri->user) { soup_uri_free (uri); return NULL; } uri_string = at + 1; } else uri->user = uri->password = NULL; /* Find host and port. */ if (*uri_string == '[') { uri_string++; hostend = strchr (uri_string, ']'); if (!hostend || hostend > path) { soup_uri_free (uri); return NULL; } if (*(hostend + 1) == ':') colon = hostend + 1; else colon = NULL; } else { colon = memchr (uri_string, ':', path - uri_string); hostend = colon ? colon : path; } uri->host = uri_decoded_copy (uri_string, hostend - uri_string); if (!uri->host) { soup_uri_free (uri); return NULL; } if (colon && colon != path - 1) { char *portend; uri->port = strtoul (colon + 1, &portend, 10); if (portend != (char *)path) { soup_uri_free (uri); return NULL; } } uri_string = path; } /* Find query */ question = memchr (uri_string, '?', end - uri_string); if (question) { if (question[1]) { uri->query = uri_normalized_copy (question + 1, end - (question + 1), NULL, TRUE); if (!uri->query) { soup_uri_free (uri); return NULL; } } end = question; } if (end != uri_string) { uri->path = uri_normalized_copy (uri_string, end - uri_string, NULL, TRUE); if (!uri->path) { soup_uri_free (uri); return NULL; } } /* Apply base URI. Again, this is spelled out in RFC 3986. */ if (base && !uri->scheme && uri->host) uri->scheme = base->scheme; else if (base && !uri->scheme) { uri->scheme = base->scheme; uri->user = g_strdup (base->user); uri->password = g_strdup (base->password); uri->host = g_strdup (base->host); uri->port = base->port; if (!uri->path) { uri->path = g_strdup (base->path); if (!uri->query) uri->query = g_strdup (base->query); remove_dot_segments = FALSE; } else if (*uri->path != '/') { char *newpath, *last; last = strrchr (base->path, '/'); if (last) { newpath = g_strdup_printf ("%.*s/%s", (int)(last - base->path), base->path, uri->path); } else newpath = g_strdup_printf ("/%s", uri->path); g_free (uri->path); uri->path = newpath; } } if (remove_dot_segments && uri->path && *uri->path) { char *p = uri->path, *q; /* Remove "./" where "." is a complete segment. */ for (p = uri->path + 1; *p; ) { if (*(p - 1) == '/' && *p == '.' && *(p + 1) == '/') memmove (p, p + 2, strlen (p + 2) + 1); else p++; } /* Remove "." at end. */ if (p > uri->path + 2 && *(p - 1) == '.' && *(p - 2) == '/') *(p - 1) = '\0'; /* Remove "<segment>/../" where <segment> != ".." */ for (p = uri->path + 1; *p; ) { if (!strncmp (p, "../", 3)) { p += 3; continue; } q = strchr (p + 1, '/'); if (!q) break; if (strncmp (q, "/../", 4) != 0) { p = q + 1; continue; } memmove (p, q + 4, strlen (q + 4) + 1); p = uri->path + 1; } /* Remove "<segment>/.." at end where <segment> != ".." */ q = strrchr (uri->path, '/'); if (q && !strcmp (q, "/..")) { p = q - 1; while (p > uri->path && *p != '/') p--; if (strncmp (p, "/../", 4) != 0) *(p + 1) = 0; } /* Remove extraneous initial "/.."s */ while (!strncmp (uri->path, "/../", 4)) memmove (uri->path, uri->path + 3, strlen (uri->path) - 2); if (!strcmp (uri->path, "/..")) uri->path[1] = '\0'; } /* HTTP-specific stuff */ if (uri->scheme == SOUP_URI_SCHEME_HTTP || uri->scheme == SOUP_URI_SCHEME_HTTPS) { if (!SOUP_URI_VALID_FOR_HTTP (uri)) { soup_uri_free (uri); return NULL; } if (!uri->path) uri->path = g_strdup ("/"); } if (!uri->port) uri->port = soup_scheme_default_port (uri->scheme); if (!uri->path) uri->path = g_strdup (""); return uri; }