static int http_uri_parse(const char *str, struct http_uri *uri) { const char *ptr, *start, *colon; size_t toklen; memset(uri, 0, sizeof(struct http_uri)); ptr = str; if (*ptr == '\0') { http_set_error("empty string"); return -1; } /* Scheme */ start = ptr; if (*ptr == '/') { goto path; } else if (!(*ptr >= 'a' && *ptr <= 'z') && !(*ptr >= 'A' && *ptr <= 'Z') && *ptr != '%') { http_set_error("invalid first character \\%hhu in scheme", (unsigned char)*ptr); return -1; } for (;;) { if (*ptr == ':' || *ptr == '\0') { toklen = (size_t)(ptr - start); uri->scheme = http_uri_decode_component(start, toklen); if (!uri->scheme) return -1; break; } else if (!http_uri_is_scheme_char((unsigned char)*ptr) && *ptr != '%') { http_set_error("invalid character \\%hhu in scheme", (unsigned char)*ptr); return -1; } ptr++; } /* Skip '://' */ if (ptr[0] != ':' || ptr[1] != '/' || ptr[2] != '/') { http_set_error("invalid characters after scheme"); return -1; } ptr += 3; /* User (optional) */ start = ptr; colon = NULL; while (*ptr != '\0') { if (*ptr == ':') { colon = ptr; } else if (*ptr == '@') { if (colon) { toklen = (size_t)(colon - start); } else { toklen = (size_t)(ptr - start); } uri->user = http_uri_decode_component(start, toklen); if (!uri->user) return -1; if (colon) ptr = colon; break; } else if (*ptr == '/') { /* End of authority, no user found */ break; } ptr++; } if (!uri->user) { /* Since we did not find a username, we backtrack to read the host. */ ptr = start; } /* Password (optional) */ if (uri->user && *ptr == ':') { start = ptr; for (;;) { if (*ptr == '@' || *ptr == '\0') { toklen = (size_t)(ptr - start - 1); if (toklen == 0) { http_set_error("empty password"); return -1; } uri->password = http_uri_decode_component(start + 1, toklen); if (!uri->password) return -1; break; } else if (*ptr == '/') { /* End of authority, no password found */ break; } ptr++; } if (!uri->password) { http_set_error("empty password"); return -1; } } if (uri->user) { /* Skip '@' */ ptr++; } /* Host */ start = ptr; if (*start >= '0' && *start <= '9') { /* IPv4 address */ for (;;) { if (*ptr == '/' || *ptr == ':' || *ptr == '\0') { toklen = (size_t)(ptr - start); if (toklen == 0) { http_set_error("empty host"); return -1; } uri->host = http_uri_decode_component(start, toklen); if (!uri->host) return -1; break; } else if (!http_uri_is_ipv4_addr_char((unsigned char)*ptr)) { http_set_error("invalid character \\%hhu in ipv4 address", (unsigned char)*ptr); return -1; } ptr++; } } else if (*start == '[') { ptr++; /* '[' */ start = ptr; /* IPv6 address */ for (;;) { if (*ptr == ']') { toklen = (size_t)(ptr - start); if (toklen == 0) { http_set_error("empty host"); return -1; } uri->host = http_uri_decode_component(start, toklen); if (!uri->host) return -1; ptr++; /* ']' */ break; } else if (*ptr == '\0') { http_set_error("truncated ipv6 address"); return -1; } else if (!http_uri_is_ipv6_addr_char((unsigned char)*ptr)) { http_set_error("invalid character \\%hhu in ipv6 address", (unsigned char)*ptr); return -1; } ptr++; } } else { /* Hostname */ for (;;) { if (*ptr == '/' || *ptr == ':' || *ptr == '#' || *ptr == '\0') { toklen = (size_t)(ptr - start); if (toklen == 0) { http_set_error("empty host"); return -1; } uri->host = http_uri_decode_component(start, toklen); if (!uri->host) return -1; break; } ptr++; } } /* Port (optional) */ if (*ptr == ':') { ptr++; start = ptr; for (;;) { if (*ptr == '/' || *ptr == '#' || *ptr == '\0') { toklen = (size_t)(ptr - start); if (toklen == 0) { http_set_error("empty port"); return -1; } uri->port = http_uri_decode_component(start, toklen); if (!uri->port) return -1; break; } else if (!http_uri_is_port_char((unsigned char)*ptr)) { http_set_error("invalid character \\%hhu in port", (unsigned char)*ptr); return -1; } ptr++; } } /* Path (optional, default '/') */ path: if (*ptr == '/') { start = ptr; for (;;) { if (*ptr == '?' || *ptr == '#' || *ptr == '\0') { toklen = (size_t)(ptr - start); uri->path = http_uri_decode_component(start, toklen); if (!uri->path) return -1; break; } ptr++; } } else { uri->path = http_strdup("/"); } /* Query (optional) */ if (*ptr == '?') { char *query; ptr++; start = ptr; while (*ptr != '#' && *ptr != '\0') ptr++; toklen = (size_t)(ptr - start); query = http_strndup(start, toklen); if (http_query_parameters_parse(query, &uri->query_parameters, &uri->nb_query_parameters) == -1) { http_free(query); return -1; } http_free(query); } /* Fragment (optional) */ if (*ptr == '#') { ptr++; start = ptr; while (*ptr != '\0') ptr++; toklen = (size_t)(ptr - start); uri->fragment = http_uri_decode_component(start, toklen); if (!uri->fragment) return -1; } if (http_uri_finalize(uri) == -1) return -1; return 1; }
struct http_url * http_url_parse(const char *string) { struct http_url *url; const char *ptr, *start, *end, *at, *colon; size_t toklen; url = http_url_new(); ptr = string; #define HTTP_FAIL(fmt_, ...) \ do { \ c_set_error(fmt_, ##__VA_ARGS__); \ goto error; \ } while (0) /* Scheme */ if (ptr[0] == '/') { if (ptr[1] == '/') { goto authority; } else { goto path; } } start = ptr; if (!(http_url_is_scheme_first_char(*ptr))) HTTP_FAIL("invalid first character in scheme"); for (;;) { if (*ptr == '\0' || *ptr == ':') { toklen = (size_t)(ptr - start); if (toklen == 0) HTTP_FAIL("empty scheme"); url->scheme = c_strndup(start, toklen); break; } else if (!http_url_is_scheme_char(*ptr)) { HTTP_FAIL("invalid character in scheme"); } ptr++; } if (*ptr == ':') ptr++; authority: /* Authority */ if (ptr[0] != '/' || ptr[1] != '/') HTTP_FAIL("invalid characters after scheme"); ptr += 2; end = ptr + strcspn(ptr, "/?#"); at = strchr(ptr, '@'); if (at && at < end) { /* User */ colon = strchr(ptr, ':'); if (colon) { toklen = (size_t)(colon - ptr); } else { toklen = (size_t)(at - ptr); } url->user = http_url_userinfo_decode(ptr, toklen); if (!url->user) HTTP_FAIL("cannot decode user: %s", c_get_error()); if (colon) { /* Password */ toklen = (size_t)(at - colon - 1); url->password = http_url_userinfo_decode(colon + 1, toklen); if (!url->password) HTTP_FAIL("cannot decode password: %s", c_get_error()); } ptr = at + 1; } /* Host */ if (*ptr == '[') { /* IPv6 address */ ptr++; toklen = strcspn(ptr, "]"); } else { toklen = strcspn(ptr, ":/?#"); } url->host = http_url_host_decode(ptr, toklen); if (!url->host) HTTP_FAIL("cannot decode host: %s", c_get_error()); ptr += toklen; if (*ptr == ']') ptr++; if (*ptr == ':') { size_t port_sz; ptr++; /* Port */ toklen = strcspn(ptr, "/?#"); if (toklen == 0) HTTP_FAIL("empty port number"); url->port = c_strndup(ptr, toklen); if (c_parse_u16(url->port, &url->port_number, &port_sz) == -1) HTTP_FAIL("invalid port number: %s", c_get_error()); if (port_sz != strlen(url->port)) HTTP_FAIL("invalid trailing data after port number"); if (url->port_number == 0) HTTP_FAIL("invalid port number"); ptr += toklen; } path: if (*ptr == '/') { /* Path */ toklen = strcspn(ptr, "?#"); url->path = http_url_path_decode(ptr, toklen); if (!url->path) HTTP_FAIL("cannot decode path: %s", c_get_error()); ptr += toklen; } if (*ptr == '?') { ptr++; /* Query */ toklen = strcspn(ptr, "#"); url->query = c_strndup(ptr, toklen); if (!url->query) HTTP_FAIL("cannot decode query: %s", c_get_error()); url->query_parameters = http_query_parameters_parse(url->query); if (!url->query_parameters) HTTP_FAIL("cannot parse query parameters: %s", c_get_error()); ptr += toklen; } if (*ptr == '#') { ptr++; /* Fragment */ toklen = strlen(ptr); url->fragment = http_url_fragment_decode(ptr, toklen); if (!url->fragment) HTTP_FAIL("cannot decode fragment: %s", c_get_error()); ptr += toklen; } #undef HTTP_FAIL return url; error: http_url_delete(url); return NULL; }