Ejemplo n.º 1
0
static int
http_uri_parse(const char *str, struct http_uri *uri) {
    const char *ptr, *start, *colon;
    size_t toklen;

    memset(uri, 0, sizeof(struct http_uri));

    ptr = str;

    if (*ptr == '\0') {
        http_set_error("empty string");
        return -1;
    }

    /* Scheme */
    start = ptr;

    if (*ptr == '/') {
        goto path;
    } else if (!(*ptr >= 'a' && *ptr <= 'z')
            && !(*ptr >= 'A' && *ptr <= 'Z')
            && *ptr != '%') {
        http_set_error("invalid first character \\%hhu in scheme",
                       (unsigned char)*ptr);
        return -1;
    }

    for (;;) {
        if (*ptr == ':' || *ptr == '\0') {
            toklen = (size_t)(ptr - start);
            uri->scheme = http_uri_decode_component(start, toklen);
            if (!uri->scheme)
                return -1;

            break;
        } else if (!http_uri_is_scheme_char((unsigned char)*ptr)
                && *ptr != '%') {
            http_set_error("invalid character \\%hhu in scheme",
                           (unsigned char)*ptr);
            return -1;
        }

        ptr++;
    }

    /* Skip '://' */
    if (ptr[0] != ':' || ptr[1] != '/' || ptr[2] != '/') {
        http_set_error("invalid characters after scheme");
        return -1;
    }

    ptr += 3;

    /* User (optional) */
    start = ptr;
    colon = NULL;
    while (*ptr != '\0') {
        if (*ptr == ':') {
            colon = ptr;
        } else if (*ptr == '@') {
            if (colon) {
                toklen = (size_t)(colon - start);
            } else {
                toklen = (size_t)(ptr - start);
            }

            uri->user = http_uri_decode_component(start, toklen);
            if (!uri->user)
                return -1;

            if (colon)
                ptr = colon;
            break;
        } else if (*ptr == '/') {
            /* End of authority, no user found */
            break;
        }

        ptr++;
    }

    if (!uri->user) {
        /* Since we did not find a username, we backtrack to read the host. */
        ptr = start;
    }

    /* Password (optional) */
    if (uri->user && *ptr == ':') {
        start = ptr;

        for (;;) {
            if (*ptr == '@' || *ptr == '\0') {
                toklen = (size_t)(ptr - start - 1);
                if (toklen == 0) {
                    http_set_error("empty password");
                    return -1;
                }

                uri->password = http_uri_decode_component(start + 1, toklen);
                if (!uri->password)
                    return -1;

                break;
            } else if (*ptr == '/') {
                /* End of authority, no password found */
                break;
            }

            ptr++;
        }

        if (!uri->password) {
            http_set_error("empty password");
            return -1;
        }
    }

    if (uri->user) {
        /* Skip '@' */
        ptr++;
    }

    /* Host */
    start = ptr;
    if (*start >= '0' && *start <= '9') {
        /* IPv4 address */
        for (;;) {
            if (*ptr == '/' || *ptr == ':' || *ptr == '\0') {
                toklen = (size_t)(ptr - start);
                if (toklen == 0) {
                    http_set_error("empty host");
                    return -1;
                }

                uri->host = http_uri_decode_component(start, toklen);
                if (!uri->host)
                    return -1;

                break;
            } else if (!http_uri_is_ipv4_addr_char((unsigned char)*ptr)) {
                http_set_error("invalid character \\%hhu in ipv4 address",
                               (unsigned char)*ptr);
                return -1;
            }

            ptr++;
        }
    } else if (*start == '[') {
        ptr++; /* '[' */
        start = ptr;

        /* IPv6 address */
        for (;;) {
            if (*ptr == ']') {
                toklen = (size_t)(ptr - start);
                if (toklen == 0) {
                    http_set_error("empty host");
                    return -1;
                }

                uri->host = http_uri_decode_component(start, toklen);
                if (!uri->host)
                    return -1;

                ptr++; /* ']' */

                break;
            } else if (*ptr == '\0') {
                http_set_error("truncated ipv6 address");
                return -1;
            } else if (!http_uri_is_ipv6_addr_char((unsigned char)*ptr)) {
                http_set_error("invalid character \\%hhu in ipv6 address",
                               (unsigned char)*ptr);
                return -1;
            }

            ptr++;
        }
    } else {
        /* Hostname */
        for (;;) {
            if (*ptr == '/' || *ptr == ':' || *ptr == '#' || *ptr == '\0') {
                toklen = (size_t)(ptr - start);
                if (toklen == 0) {
                    http_set_error("empty host");
                    return -1;
                }

                uri->host = http_uri_decode_component(start, toklen);
                if (!uri->host)
                    return -1;

                break;
            }

            ptr++;
        }
    }

    /* Port (optional) */
    if (*ptr == ':') {
        ptr++;

        start = ptr;

        for (;;) {
            if (*ptr == '/' || *ptr == '#' || *ptr == '\0') {
                toklen = (size_t)(ptr - start);
                if (toklen == 0) {
                    http_set_error("empty port");
                    return -1;
                }

                uri->port = http_uri_decode_component(start, toklen);
                if (!uri->port)
                    return -1;

                break;
            } else if (!http_uri_is_port_char((unsigned char)*ptr)) {
                http_set_error("invalid character \\%hhu in port",
                               (unsigned char)*ptr);
                return -1;
            }

            ptr++;
        }
    }

    /* Path (optional, default '/') */
path:
    if (*ptr == '/') {
        start = ptr;

        for (;;) {
            if (*ptr == '?' || *ptr == '#' || *ptr == '\0') {
                toklen = (size_t)(ptr - start);
                uri->path = http_uri_decode_component(start, toklen);
                if (!uri->path)
                    return -1;

                break;
            }

            ptr++;
        }
    } else {
        uri->path = http_strdup("/");
    }

    /* Query (optional) */
    if (*ptr == '?') {
        char *query;

        ptr++;

        start = ptr;

        while (*ptr != '#' && *ptr != '\0')
            ptr++;

        toklen = (size_t)(ptr - start);
        query = http_strndup(start, toklen);

        if (http_query_parameters_parse(query,
                                        &uri->query_parameters,
                                        &uri->nb_query_parameters) == -1) {
            http_free(query);
            return -1;
        }

        http_free(query);
    }

    /* Fragment (optional) */
    if (*ptr == '#') {
        ptr++;

        start = ptr;

        while (*ptr != '\0')
            ptr++;

        toklen = (size_t)(ptr - start);
        uri->fragment = http_uri_decode_component(start, toklen);
        if (!uri->fragment)
            return -1;
    }

    if (http_uri_finalize(uri) == -1)
        return -1;

    return 1;
}
Ejemplo n.º 2
0
Archivo: url.c Proyecto: galdor/libhttp
struct http_url *
http_url_parse(const char *string) {
    struct http_url *url;
    const char *ptr, *start, *end, *at, *colon;
    size_t toklen;

    url = http_url_new();

    ptr = string;

#define HTTP_FAIL(fmt_, ...)              \
    do {                                  \
        c_set_error(fmt_, ##__VA_ARGS__); \
        goto error;                       \
    } while (0)

    /* Scheme */
    if (ptr[0] == '/') {
        if (ptr[1] == '/') {
            goto authority;
        } else {
            goto path;
        }
    }

    start = ptr;
    if (!(http_url_is_scheme_first_char(*ptr)))
        HTTP_FAIL("invalid first character in scheme");
    for (;;) {
        if (*ptr == '\0' || *ptr == ':') {
            toklen = (size_t)(ptr - start);
            if (toklen == 0)
                HTTP_FAIL("empty scheme");
            url->scheme = c_strndup(start, toklen);
            break;
        } else if (!http_url_is_scheme_char(*ptr)) {
            HTTP_FAIL("invalid character in scheme");
        }

        ptr++;
    }

    if (*ptr == ':')
        ptr++;

authority:
    /* Authority */
    if (ptr[0] != '/' || ptr[1] != '/')
        HTTP_FAIL("invalid characters after scheme");
    ptr += 2;

    end = ptr + strcspn(ptr, "/?#");

    at = strchr(ptr, '@');
    if (at && at < end) {
        /* User */
        colon = strchr(ptr, ':');

        if (colon) {
            toklen = (size_t)(colon - ptr);
        } else {
            toklen = (size_t)(at - ptr);
        }

        url->user = http_url_userinfo_decode(ptr, toklen);
        if (!url->user)
            HTTP_FAIL("cannot decode user: %s", c_get_error());

        if (colon) {
            /* Password */
            toklen = (size_t)(at - colon - 1);

            url->password = http_url_userinfo_decode(colon + 1, toklen);
            if (!url->password)
                HTTP_FAIL("cannot decode password: %s", c_get_error());
        }

        ptr = at + 1;
    }

    /* Host */
    if (*ptr == '[') {
        /* IPv6 address */

        ptr++;
        toklen = strcspn(ptr, "]");
    } else {
        toklen = strcspn(ptr, ":/?#");
    }

    url->host = http_url_host_decode(ptr, toklen);
    if (!url->host)
        HTTP_FAIL("cannot decode host: %s", c_get_error());

    ptr += toklen;
    if (*ptr == ']')
        ptr++;

    if (*ptr == ':') {
        size_t port_sz;

        ptr++;

        /* Port */
        toklen = strcspn(ptr, "/?#");
        if (toklen == 0)
            HTTP_FAIL("empty port number");

        url->port = c_strndup(ptr, toklen);
        if (c_parse_u16(url->port, &url->port_number, &port_sz) == -1)
            HTTP_FAIL("invalid port number: %s", c_get_error());
        if (port_sz != strlen(url->port))
            HTTP_FAIL("invalid trailing data after port number");
        if (url->port_number == 0)
            HTTP_FAIL("invalid port number");

        ptr += toklen;
    }

path:
    if (*ptr == '/') {
        /* Path */
        toklen = strcspn(ptr, "?#");
        url->path = http_url_path_decode(ptr, toklen);
        if (!url->path)
            HTTP_FAIL("cannot decode path: %s", c_get_error());

        ptr += toklen;
    }

    if (*ptr == '?') {
        ptr++;

        /* Query */
        toklen = strcspn(ptr, "#");
        url->query = c_strndup(ptr, toklen);
        if (!url->query)
            HTTP_FAIL("cannot decode query: %s", c_get_error());

        url->query_parameters = http_query_parameters_parse(url->query);
        if (!url->query_parameters)
            HTTP_FAIL("cannot parse query parameters: %s", c_get_error());

        ptr += toklen;
    }

    if (*ptr == '#') {
        ptr++;

        /* Fragment */
        toklen = strlen(ptr);
        url->fragment = http_url_fragment_decode(ptr, toklen);
        if (!url->fragment)
            HTTP_FAIL("cannot decode fragment: %s", c_get_error());

        ptr += toklen;
    }

#undef HTTP_FAIL

    return url;

error:
    http_url_delete(url);
    return NULL;
}