Datum uri_read(PG_FUNCTION_ARGS) { text *in = PG_GETARG_TEXT_P(0); HeapTuple out = uri_new(fcinfo, VARDATA(in), VARSIZE_ANY_EXHDR(in)); PG_FREE_IF_COPY(in, 0); PG_RETURN_DATUM(HeapTupleGetDatum(out)); }
static void test_uri_parse_long(void) { static const char uri1[] = "http://www.google.com/images?hl=en&client=firefox-a&hs=tldrls=org.mozilla:en-US:official&q=philippine+hijacked+bus+picturesum=1&ie=UTF-8&source=univ&ei=SC-_TLbjE5H2tgO70PHODA&sa=X&oi=image_result_group&ct=titleresnum=1&ved=0CCIQsAQwAAbiw=1239bih=622"; uri_t *u = uri_new(); g_assert(uri_parse(u, uri1, strlen(uri1), NULL)); uri_free(u); }
static void test_uri_parse_brackets(void) { static const char uri1[] = "http://ad.doubleclick.net/adi/N5371.Google/B4882217.2;sz=160x600;pc=[TPAS_ID];click=http://googleads.g.doubleclick.net/aclk?sa=l&ai=Bepsf-z83TfuWJIKUjQS46ejyAeqc0t8B2uvnkxeiro6LRdC9wQEQARgBIPjy_wE4AFC0-b7IAmDJ9viGyKOgGaABnoHS5QOyAQ53d3cub3NuZXdzLmNvbboBCjE2MHg2MDBfYXPIAQnaAS9odHRwOi8vd3d3Lm9zbmV3cy5jb20vdXNlci9qYWNrZWVibGV1L2NvbW1lbnRzL7gCGMgCosXLE6gDAegDrwLoA5EG6APgBfUDAgAARPUDIAAAAA&num=1&sig=AGiWqty7uE4ibhWIPcOiZlX0__AQkpGEWA&client=ca-pub-6467510223857492&adurl=;ord=410711259?"; const gchar *error_at = NULL; uri_t *u = uri_new(); int st = uri_parse(u, uri1, strlen(uri1), &error_at); if (error_at) g_test_message("uri_parse failed at -> %s", error_at); uri_free(u); g_assert(st); }
static void test_uri_normalize_all_slashes(void) { static const char uri1[] = "eXAMPLE://a//////"; uri_t *u = uri_new(); g_assert(uri_parse(u, uri1, strlen(uri1), NULL)); uri_normalize(u); g_assert_cmpstr("/", ==, u->path); uri_free(u); }
static void test_uri_parse_pipe(void) { static const char uri1[] = "http://ads.pointroll.com/PortalServe/?pid=1048344U85520100615200820&flash=10time=3|18:36|-8redir=$CTURL$r=0.8149350655730814"; const gchar *error_at = NULL; uri_t *u = uri_new(); int st = uri_parse(u, uri1, strlen(uri1), &error_at); if (error_at) g_test_message("uri_parse failed at -> %s", error_at); uri_free(u); g_assert(st); }
static void test_uri_parse_badencode(void) { static const char uri1[] = "http://b.scorecardresearch.com/b?c1=2&c2=6035223rn=1404429288&c7=http%3A%2F%2Fdetnews.com%2Farticle%2F20110121%2FMETRO01%2F101210376%2FDetroit-women-get-no-help-in-arrest-of-alleged-car-thief&c8=Detroit%20women%20get%20no%20help%20in%20arrest%20of%20alleged%2&cv=2.2&cs=js"; const gchar *error_at = NULL; uri_t *u = uri_new(); int st = uri_parse(u, uri1, strlen(uri1), &error_at); if (error_at) g_test_message("uri_parse failed at -> %s", error_at); uri_normalize(u); uri_free(u); g_assert(st); }
static void test_uri_parse_double_percent(void) { static const char uri1[] = "http://bh.contextweb.com/bh/getuid?url=http://image2.pubmatic.com/AdServer/Pug?vcode=bz0yJnR5cGU9MSZqcz0xJmNvZGU9ODI1JnRsPTQzMjAw&piggybackCookie=%%CWGUID%%,User_tokens:%%USER_TOKENS%%"; const gchar *error_at = NULL; uri_t *u = uri_new(); int st = uri_parse(u, uri1, strlen(uri1), &error_at); if (error_at) g_test_message("uri_parse failed at -> %s", error_at); uri_normalize(u); uri_free(u); g_assert(st); }
static void test_uri_parse_unicode_escape(void) { static const char uri1[] = "http://b.scorecardresearch.com/b?C1=8&C2=6035047&C3=463.9924&C4=ad21868c&C5=173229&C6=16jfaue1ukmeoq&C7=http%3A//remotecontrol.mtv.com/2011/01/20/sammi-sweetheart-giancoloa-terrell-owens-hair/&C8=Hot%20Shots%3A%20Sammi%20%u2018Sweetheart%u2019%20Lets%20Terrell%20Owens%20Play%20With%20Her%20Hair%20%BB%20MTV%20Remote%20Control%20Blog&C9=&C10=1680x1050rn=58013009"; const gchar *error_at = NULL; uri_t *u = uri_new(); int st = uri_parse(u, uri1, strlen(uri1), &error_at); if (error_at) g_test_message("uri_parse failed at -> %s", error_at); uri_normalize(u); uri_free(u); g_assert(st); }
static void test_uri_normalize_host(void) { static const char uri1[] = "eXAMPLE://ExAmPlE.CoM/"; uri_t *u = uri_new(); g_assert(uri_parse(u, uri1, strlen(uri1), NULL)); uri_normalize(u); g_assert_cmpstr("example.com", ==, u->host); uri_free(u); }
static void test_uri_parse_many(void) { uri_t *u = uri_new(); const char uri1[] = "http://example.com/path/to/something?query=string#frag"; g_assert(uri_parse(u, uri1, strlen(uri1), NULL)); g_assert_cmpstr(u->scheme, ==, "http"); g_assert_cmpstr(u->host, ==, "example.com"); g_assert_cmpstr(u->path, ==, "/path/to/something"); g_assert_cmpstr(u->query, ==, "?query=string"); g_assert_cmpstr(u->fragment, ==, "#frag"); uri_clear(u); const char uri2[] = "http://*****:*****@example.com:5555/path/to/"; g_assert(uri_parse(u, uri2, strlen(uri2), NULL)); g_assert_cmpstr(u->scheme, ==, "http"); g_assert_cmpstr(u->userinfo, ==, "jason:password"); g_assert_cmpstr(u->host, ==, "example.com"); g_assert(u->port == 5555); g_assert_cmpstr(u->path, ==, "/path/to/"); uri_clear(u); /* this should fail */ const char uri3[] = "http://baduri;f[303fds"; const char *error_at = NULL; g_assert(uri_parse(u, uri3, strlen(uri3), &error_at) == 0); g_assert(error_at != NULL); g_assert_cmpstr("[303fds", ==, error_at); uri_clear(u); const char uri4[] = "https://example.com:23999"; g_assert(uri_parse(u, uri4, strlen(uri4), &error_at)); g_assert_cmpstr(u->scheme, ==, "https"); g_assert_cmpstr(u->host, ==, "example.com"); g_assert(u->port == 23999); /* TODO: maybe make empty path == NULL? */ g_assert_cmpstr(u->path, ==, ""); g_assert(u->query == NULL); g_assert(u->fragment == NULL); g_assert(error_at == NULL); uri_clear(u); const char uri5[] = "svn+ssh://jason:[email protected]:22/thing/and/stuff"; g_assert(uri_parse(u, uri5, strlen(uri5), &error_at)); g_assert_cmpstr(u->scheme, ==, "svn+ssh"); g_assert_cmpstr(u->userinfo, ==, "jason:password"); g_assert_cmpstr(u->host, ==, "example.com"); g_assert(u->port == 22); g_assert_cmpstr(u->path, ==, "/thing/and/stuff"); g_assert(error_at == NULL); uri_clear(u); uri_free(u); }
static void test_uri_compose(void) { static const char uri1[] = "eXAMPLE://ExAmPlE.CoM/foo/../boo/%25hi%0b/.t%65st/./this?query=string#frag"; uri_t *u = uri_new(); g_assert(uri_parse(u, uri1, strlen(uri1), NULL)); uri_normalize(u); char *s = uri_compose(u); g_assert_cmpstr("example://example.com/boo/%25hi%0B/.test/this?query=string#frag", ==, s); free(s); uri_free(u); }
static void test_uri_normalize_one_slash(void) { static const char uri1[] = "eXAMPLE://a"; static const char uri2[] = "eXAMPLE://a/"; uri_t *u = uri_new(); g_assert(uri_parse(u, uri1, strlen(uri1), NULL)); uri_normalize(u); g_assert_cmpstr("/", ==, u->path); uri_clear(u); g_assert(uri_parse(u, uri2, strlen(uri2), NULL)); uri_normalize(u); g_assert_cmpstr("/", ==, u->path); uri_free(u); }
Uri *uri_parse(const char *uriStr) { Uri *uri = uri_new(); unsigned int protocolEnd = 0; unsigned int hostEnd = 0; for (unsigned int index = 1; index < strlen(uriStr); index++) { // We have found a protocol. if (uriStr[index] == ':') { uri->protocol = malloc(index + 1); protocolEnd = index + 3; strncpy(uri->protocol, uriStr, index); uri->protocol[index] = '\0'; break; } } for (unsigned int index = 1; index + protocolEnd < strlen(uriStr); index++) { if (uriStr[index + protocolEnd - 1] != '/' && uriStr[index + protocolEnd] == '/') { uri->host = malloc(index + 1); hostEnd = index + protocolEnd; strncpy(uri->host, uriStr + protocolEnd, index); uri->host[index] = '\0'; break; } } uri->path = malloc(strlen(uriStr) - hostEnd + 1); strcpy(uri->path, uriStr + hostEnd); return uri; }
static void test_uri_normalize(void) { static const char uri1[] = "eXAMPLE://a/./b/../b/%63/%7bfoo%7d"; uri_t *u = uri_new(); g_assert(uri_parse(u, uri1, strlen(uri1), NULL)); uri_normalize(u); g_assert_cmpstr("/b/c/%7Bfoo%7D", ==, u->path); uri_clear(u); static const char uri2[] = "http://host/../"; g_assert(uri_parse(u, uri2, strlen(uri2), NULL)); uri_normalize(u); g_assert_cmpstr(u->path, ==, "/"); uri_clear(u); static const char uri3[] = "http://host/./"; g_assert(uri_parse(u, uri3, strlen(uri3), NULL)); uri_normalize(u); g_assert_cmpstr("/", ==, u->path); uri_free(u); }
static void test_uri_transform(void) { /* examples from http://tools.ietf.org/html/rfc3986#section-5.4.1 */ static const char base_uri[] = "http://a/b/c/d;p?q"; char *s; uri_t *b = uri_new(); uri_t *t = uri_new(); uri_t *r = uri_new(); g_assert(uri_parse(b, base_uri, strlen(base_uri), NULL)); uri_set_path(r, "g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "./g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g/", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g/", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "/g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_host(r, "g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_query(r, "?y", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/d;p?y", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g", -1); uri_set_query(r, "?y", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g?y", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_fragment(r, "#s", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/d;p?q#s", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g", -1); uri_set_fragment(r, "#s", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g#s", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g", -1); uri_set_query(r, "?y", -1); uri_set_fragment(r, "#s", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g?y#s", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, ";x", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/;x", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g;x", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g;x", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g;x", -1); uri_set_query(r, "?y", -1); uri_set_fragment(r, "#s", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g;x?y#s", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/d;p?q", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, ".", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "./", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "..", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "../g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "../..", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "../../", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "../../g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/g", ==, s); free(s); uri_clear(r); uri_clear(t); /* abnormal examples */ uri_set_path(r, "../../../g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "../../../../g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "/./g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "/../g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g.", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g.", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, ".g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/.g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g..", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g..", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "..g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/..g", ==, s); free(s); uri_clear(r); uri_clear(t); /* nonsensical */ uri_set_path(r, "./../g", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/g", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "./g/.", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g/", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g/./h", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g/h", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g/../h", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/h", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g;x=1/./y", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/g;x=1/y", ==, s); free(s); uri_clear(r); uri_clear(t); uri_set_path(r, "g;x=1/../y", -1); uri_transform(b, r, t); s = uri_compose(t); g_assert_cmpstr("http://a/b/c/y", ==, s); free(s); uri_clear(r); uri_clear(t); uri_free(r); uri_free(t); uri_free(b); }
void *handle_connection(void *arg) { st_netfd_t client_nfd = (st_netfd_t)arg; struct http_stream *s = http_stream_create(HTTP_SERVER, SEC2USEC(5)); char buf[4*1024]; int error = 0; struct http_stream *cs = NULL; uri_t *u = uri_new(); int should_close = 1; for (;;) { should_close = 1; if (s->status != HTTP_STREAM_OK) break; cs = NULL; error = 0; s->timeout = SEC2USEC(5); int status = http_stream_request_read(s, client_nfd); s->timeout = SEC2USEC(30); // longer timeout for the rest if (status != HTTP_STREAM_OK) { if (s->status == HTTP_STREAM_CLOSED || s->status == HTTP_STREAM_TIMEOUT) { error = 1; } else { error = 400; } goto release; } cs = http_stream_create(HTTP_CLIENT, SEC2USEC(30)); //http_request_debug_print(s->req); fprintf(stderr, "request uri: %s\n", s->req->uri); const char *error_at = NULL; uri_clear(u); if (uri_parse(u, s->req->uri, strlen(s->req->uri), &error_at) == 0) { fprintf(stderr, "uri_parse error: %s\n", error_at); error = 400; goto release; } uri_normalize(u); if (http_stream_connect(cs, u->host, u->port) != HTTP_STREAM_OK) { error = 504; goto release; } http_request_header_remove(s->req, "Accept-Encoding"); http_request_header_remove(s->req, "Proxy-Connection"); /* TODO: need to expose a copy api for http message */ http_request_t *tmp_req = cs->req; cs->req = s->req; char *request_uri = uri_compose_partial(u); char *tmp_uri = s->req->uri; cs->req->uri = request_uri; if (http_stream_request_send(cs) != HTTP_STREAM_OK) { error = 504; goto release; } cs->req = tmp_req; s->req->uri = tmp_uri; free(request_uri); /* TODO: fix this. post might not contain data. probably move this logic into stream */ size_t total = 0; if (g_strcmp0("POST", s->req->method) == 0) { for (;;) { ssize_t nr = sizeof(buf); status = http_stream_read(s, buf, &nr); fprintf(stderr, "server http_stream_read nr: %zd\n", nr); if (nr < 0 || status != HTTP_STREAM_OK) { error = 1; goto release; } if (nr == 0) break; /*fwrite(buf, sizeof(char), nr, stdout);*/ ssize_t nw = st_write(cs->nfd, buf, nr, s->timeout); if (nw != nr) { error=1; goto release; } fprintf(stderr, "st_write nw: %zd\n", nr); total += nr; } fprintf(stderr, "http_stream_read total: %zu\n", total); } if (http_stream_response_read(cs) != HTTP_STREAM_OK) { error = 502; goto release; } /* TODO: properly create a new response and copy headers */ http_response_t *tmp_resp = s->resp; s->resp = cs->resp; s->resp->http_version = "HTTP/1.1"; http_response_header_remove(s->resp, "Content-Length"); http_response_header_remove(s->resp, "Transfer-Encoding"); if (s->resp->status_code != 204) http_response_header_append(s->resp, "Transfer-Encoding", "chunked"); ssize_t nw = http_stream_response_send(s, 0); s->resp = tmp_resp; fprintf(stderr, "http_stream_response_send: %zd\n", nw); if (s->resp->status_code != 204 && (cs->content_size > 0 || cs->transfer_encoding == TE_CHUNKED)) { total = 0; fprintf(stderr, "content size: %zd\n", cs->content_size); for (;;) { ssize_t nr = sizeof(buf); status = http_stream_read(cs, buf, &nr); fprintf(stderr, "client http_stream_read nr: %zd\n", nr); if (nr <= 0 || status != HTTP_STREAM_OK) break; /*fwrite(buf, sizeof(char), nr, stdout);*/ total += nr; if (http_stream_send_chunk(s, buf, nr) != HTTP_STREAM_OK) break; } fprintf(stderr, "written to client: %zu\n", total); if (total > 0 && s->status == HTTP_STREAM_OK) { http_stream_send_chunk_end(s); } else { fprintf(stderr, "for request: %s status: %d\n", s->req->uri, s->status); } } release: if (!error) { if ((g_strcmp0("HTTP/1.1", s->req->http_version) == 0) && (g_strcmp0(http_request_header_getstr(s->req, "Connection"), "close") != 0)) { // if HTTP/1.1 client and no Connection: close, then don't close should_close = 0; } else if (g_strcmp0(http_request_header_getstr(s->req, "Connection"), "keepalive") == 0) { should_close = 0; } } http_request_clear(s->req); uri_clear(u); if (cs) http_stream_close(cs); /* TODO: break loop if HTTP/1.0 and not keep-alive */ if (error) { fprintf(stderr, "ERROR: %d STATUS: %d, exiting\n", error, s->status); /* TODO: use reason string */ if (error >= 400 && s->status != HTTP_STREAM_CLOSED) { http_response_free(s->resp); s->resp = http_response_new(error, "Error"); http_response_header_append(s->resp, "Content-Length", "0"); s->status = HTTP_STREAM_OK; /* TODO: might want to move this logic into http_stream */ http_stream_response_send(s, 0); } break; } if (should_close) break; } fprintf(stderr, "exiting handle_connection (should_close: %u)\n", should_close); uri_free(u); http_stream_close(s); return NULL; }
Datum uri_in(PG_FUNCTION_ARGS) { const char *str = PG_GETARG_CSTRING(0); PG_RETURN_DATUM(HeapTupleGetDatum(uri_new(fcinfo, str, strlen(str)))); }