/* {{{ php_url_parse */ PHPAPI php_url *php_url_parse_ex(char const *str, int length) { char port_buf[6]; php_url *ret = ecalloc(1, sizeof(php_url)); char const *s, *e, *p, *pp, *ue; s = str; ue = s + length; /* parse scheme */ if ((e = memchr(s, ':', length)) && (e - s)) { /* validate scheme */ p = s; while (p < e) { /* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */ if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') { if (e + 1 < ue) { goto parse_port; } else { goto just_path; } } p++; } if (*(e + 1) == '\0') { /* only scheme is available */ ret->scheme = estrndup(s, (e - s)); php_replace_controlchars_ex(ret->scheme, (e - s)); goto end; } /* * certain schemas like mailto: and zlib: may not have any / after them * this check ensures we support those. */ if (*(e+1) != '/') { /* check if the data we get is a port this allows us to * correctly parse things like a.com:80 */ p = e + 1; while (isdigit(*p)) { p++; } if ((*p == '\0' || *p == '/') && (p - e) < 7) { goto parse_port; } ret->scheme = estrndup(s, (e-s)); php_replace_controlchars_ex(ret->scheme, (e - s)); length -= ++e - s; s = e; goto just_path; } else { ret->scheme = estrndup(s, (e-s)); php_replace_controlchars_ex(ret->scheme, (e - s)); if (*(e+2) == '/') { s = e + 3; if (!strncasecmp("file", ret->scheme, sizeof("file"))) { if (*(e + 3) == '/') { /* support windows drive letters as in: file:///c:/somedir/file.txt */ if (*(e + 5) == ':') { s = e + 4; } goto nohost; } } } else { if (!strncasecmp("file", ret->scheme, sizeof("file"))) { s = e + 1; goto nohost; } else { length -= ++e - s; s = e; goto just_path; } } } } else if (e) { /* no scheme; starts with colon: look for port */ parse_port: p = e + 1; pp = p; while (pp-p < 6 && isdigit(*pp)) { pp++; } if (pp - p > 0 && pp - p < 6 && (*pp == '/' || *pp == '\0')) { long port; memcpy(port_buf, p, (pp - p)); port_buf[pp - p] = '\0'; port = strtol(port_buf, NULL, 10); if (port > 0 && port <= 65535) { ret->port = (unsigned short) port; } else { if (ret->scheme) efree(ret->scheme); efree(ret); return NULL; } } else if (p == pp && *pp == '\0') { if (ret->scheme) efree(ret->scheme); efree(ret); return NULL; } else if (*s == '/' && *(s+1) == '/') { /* relative-scheme URL */ s += 2; } else { goto just_path; } } else if (*s == '/' && *(s+1) == '/') { /* relative-scheme URL */ s += 2; } else { just_path: ue = s + length; goto nohost; } e = ue; if (!(p = memchr(s, '/', (ue - s)))) { char *query, *fragment; query = memchr(s, '?', (ue - s)); fragment = memchr(s, '#', (ue - s)); if (query && fragment) { if (query > fragment) { e = fragment; } else { e = query; } } else if (query) { e = query; } else if (fragment) { e = fragment; } } else { e = p; } /* check for login and password */ if ((p = zend_memrchr(s, '@', (e-s)))) { if ((pp = memchr(s, ':', (p-s)))) { if ((pp-s) > 0) { ret->user = estrndup(s, (pp-s)); php_replace_controlchars_ex(ret->user, (pp - s)); } pp++; if (p-pp > 0) { ret->pass = estrndup(pp, (p-pp)); php_replace_controlchars_ex(ret->pass, (p-pp)); } } else { ret->user = estrndup(s, (p-s)); php_replace_controlchars_ex(ret->user, (p-s)); } s = p + 1; } /* check for port */ if (*s == '[' && *(e-1) == ']') { /* Short circuit portscan, we're dealing with an IPv6 embedded address */ p = s; } else { /* memrchr is a GNU specific extension Emulate for wide compatibility */ for(p = e; p >= s && *p != ':'; p--); } if (p >= s && *p == ':') { if (!ret->port) { p++; if (e-p > 5) { /* port cannot be longer then 5 characters */ if (ret->scheme) efree(ret->scheme); if (ret->user) efree(ret->user); if (ret->pass) efree(ret->pass); efree(ret); return NULL; } else if (e - p > 0) { long port; memcpy(port_buf, p, (e - p)); port_buf[e - p] = '\0'; port = strtol(port_buf, NULL, 10); if (port > 0 && port <= 65535) { ret->port = (unsigned short)port; } else { if (ret->scheme) efree(ret->scheme); if (ret->user) efree(ret->user); if (ret->pass) efree(ret->pass); efree(ret); return NULL; } } p--; } } else { p = e; } /* check if we have a valid host, if we don't reject the string as url */ if ((p-s) < 1) { if (ret->scheme) efree(ret->scheme); if (ret->user) efree(ret->user); if (ret->pass) efree(ret->pass); efree(ret); return NULL; } ret->host = estrndup(s, (p-s)); php_replace_controlchars_ex(ret->host, (p - s)); if (e == ue) { return ret; } s = e; nohost: if ((p = memchr(s, '?', (ue - s)))) { pp = strchr(s, '#'); if (pp && pp < p) { if (pp - s) { ret->path = estrndup(s, (pp-s)); php_replace_controlchars_ex(ret->path, (pp - s)); } p = pp; goto label_parse; } if (p - s) { ret->path = estrndup(s, (p-s)); php_replace_controlchars_ex(ret->path, (p - s)); } if (pp) { if (pp - ++p) { ret->query = estrndup(p, (pp-p)); php_replace_controlchars_ex(ret->query, (pp - p)); } p = pp; goto label_parse; } else if (++p - ue) { ret->query = estrndup(p, (ue-p)); php_replace_controlchars_ex(ret->query, (ue - p)); } } else if ((p = memchr(s, '#', (ue - s)))) { if (p - s) { ret->path = estrndup(s, (p-s)); php_replace_controlchars_ex(ret->path, (p - s)); } label_parse: p++; if (ue - p) { ret->fragment = estrndup(p, (ue-p)); php_replace_controlchars_ex(ret->fragment, (ue - p)); } } else { ret->path = estrndup(s, (ue-s)); php_replace_controlchars_ex(ret->path, (ue - s)); } end: return ret; }
/* {{{ php_url_parse */ PHPAPI php_url *php_url_parse_ex(char const *str, size_t length) { char port_buf[6]; php_url *ret = ecalloc(1, sizeof(php_url)); char const *s, *e, *p, *pp, *ue; s = str; ue = s + length; /* parse scheme */ if ((e = memchr(s, ':', length)) && e != s) { /* validate scheme */ p = s; while (p < e) { /* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */ if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') { if (e + 1 < ue && e < s + strcspn(s, "?#")) { goto parse_port; } else { goto just_path; } } p++; } if (e + 1 == ue) { /* only scheme is available */ ret->scheme = estrndup(s, (e - s)); php_replace_controlchars_ex(ret->scheme, (e - s)); return ret; } /* * certain schemas like mailto: and zlib: may not have any / after them * this check ensures we support those. */ if (*(e+1) != '/') { /* check if the data we get is a port this allows us to * correctly parse things like a.com:80 */ p = e + 1; while (p < ue && isdigit(*p)) { p++; } if ((p == ue || *p == '/') && (p - e) < 7) { goto parse_port; } ret->scheme = estrndup(s, (e-s)); php_replace_controlchars_ex(ret->scheme, (e - s)); s = e + 1; goto just_path; } else { ret->scheme = estrndup(s, (e-s)); php_replace_controlchars_ex(ret->scheme, (e - s)); if (e + 2 < ue && *(e + 2) == '/') { s = e + 3; if (!strncasecmp("file", ret->scheme, sizeof("file"))) { if (e + 3 < ue && *(e + 3) == '/') { /* support windows drive letters as in: file:///c:/somedir/file.txt */ if (e + 5 < ue && *(e + 5) == ':') { s = e + 4; } goto just_path; } } } else { s = e + 1; goto just_path; } } } else if (e) { /* no scheme; starts with colon: look for port */ parse_port: p = e + 1; pp = p; while (pp < ue && pp - p < 6 && isdigit(*pp)) { pp++; } if (pp - p > 0 && pp - p < 6 && (pp == ue || *pp == '/')) { zend_long port; memcpy(port_buf, p, (pp - p)); port_buf[pp - p] = '\0'; port = ZEND_STRTOL(port_buf, NULL, 10); if (port > 0 && port <= 65535) { ret->port = (unsigned short) port; if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ s += 2; } } else { if (ret->scheme) efree(ret->scheme); efree(ret); return NULL; } } else if (p == pp && pp == ue) { if (ret->scheme) efree(ret->scheme); efree(ret); return NULL; } else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ s += 2; } else { goto just_path; } } else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ s += 2; } else { goto just_path; } /* Binary-safe strcspn(s, "/?#") */ e = ue; if ((p = memchr(s, '/', e - s))) { e = p; } if ((p = memchr(s, '?', e - s))) { e = p; } if ((p = memchr(s, '#', e - s))) { e = p; } /* check for login and password */ if ((p = zend_memrchr(s, '@', (e-s)))) { if ((pp = memchr(s, ':', (p-s)))) { ret->user = estrndup(s, (pp-s)); php_replace_controlchars_ex(ret->user, (pp - s)); pp++; ret->pass = estrndup(pp, (p-pp)); php_replace_controlchars_ex(ret->pass, (p-pp)); } else { ret->user = estrndup(s, (p-s)); php_replace_controlchars_ex(ret->user, (p-s)); } s = p + 1; } /* check for port */ if (s < ue && *s == '[' && *(e-1) == ']') { /* Short circuit portscan, we're dealing with an IPv6 embedded address */ p = NULL; } else { p = zend_memrchr(s, ':', (e-s)); } if (p) { if (!ret->port) { p++; if (e-p > 5) { /* port cannot be longer then 5 characters */ if (ret->scheme) efree(ret->scheme); if (ret->user) efree(ret->user); if (ret->pass) efree(ret->pass); efree(ret); return NULL; } else if (e - p > 0) { zend_long port; memcpy(port_buf, p, (e - p)); port_buf[e - p] = '\0'; port = ZEND_STRTOL(port_buf, NULL, 10); if (port > 0 && port <= 65535) { ret->port = (unsigned short)port; } else { if (ret->scheme) efree(ret->scheme); if (ret->user) efree(ret->user); if (ret->pass) efree(ret->pass); efree(ret); return NULL; } } p--; } } else { p = e; } /* check if we have a valid host, if we don't reject the string as url */ if ((p-s) < 1) { if (ret->scheme) efree(ret->scheme); if (ret->user) efree(ret->user); if (ret->pass) efree(ret->pass); efree(ret); return NULL; } ret->host = estrndup(s, (p-s)); php_replace_controlchars_ex(ret->host, (p - s)); if (e == ue) { return ret; } s = e; just_path: e = ue; p = memchr(s, '#', (e - s)); if (p) { p++; if (p < e) { ret->fragment = estrndup(p, (e - p)); php_replace_controlchars_ex(ret->fragment, (e - p)); } e = p-1; } p = memchr(s, '?', (e - s)); if (p) { p++; if (p < e) { ret->query = estrndup(p, (e - p)); php_replace_controlchars_ex(ret->query, (e - p)); } e = p-1; } if (s < e || s == ue) { ret->path = estrndup(s, (e - s)); php_replace_controlchars_ex(ret->path, (e - s)); } return ret; }
PHPAPI char *php_replace_controlchars(char *str) { return php_replace_controlchars_ex(str, strlen(str)); }
/* {{{ php_url_parse */ ZEND_API php_url *php_url_parse_ex(const char *str, int length) { char port_buf[6]; php_url *ret = (php_url *)ecalloc(1, sizeof(php_url)); const char *s, *e, *p, *pp, *ue; s = str; ue = s + length; /* parse scheme */ if ((e = (const char *)memchr(s, ':', length)) && (e - s)) { /* * certain schemas like mailto: and zlib: may not have any / after them * this check ensures we support those. */ if (*(e+1) != '/') { /* check if the data we get is a port this allows us to * correctly parse things like a.com:80 */ p = e + 1; while (isdigit(*p)) { p++; } if ((*p) == '\0' || *p == '/') { goto parse_port; } ret->scheme = estrndup(s, (e-s)); php_replace_controlchars_ex(ret->scheme, (e - s)); length -= ++e - s; s = e; goto just_path; } else { ret->scheme = estrndup(s, (e-s)); php_replace_controlchars_ex(ret->scheme, (e - s)); if (*(e+2) == '/') { s = e + 3; if (!strncasecmp("file", ret->scheme, sizeof("file"))) { if (*(e + 3) == '/') { goto nohost; } } } else { if (!strncasecmp("file", ret->scheme, sizeof("file"))) { s = e + 1; goto nohost; } else { length -= ++e - s; s = e; goto just_path; } } } } else if (e) { /* no scheme, look for port */ parse_port: p = e + 1; pp = p; while (pp-p < 6 && isdigit(*pp)) { pp++; } if (pp-p < 6 && (*pp == '/' || *pp == '\0')) { memcpy(port_buf, p, (pp-p)); port_buf[pp-p] = '\0'; ret->port = atoi(port_buf); } else { goto just_path; } } else { just_path: ue = s + length; goto nohost; } e = ue; if (!(p = (const char *)memchr(s, '/', (ue - s)))) { if ((p = (const char *)memchr(s, '?', (ue - s)))) { e = p; } else if ((p = (const char *)memchr(s, '#', (ue - s)))) { e = p; } } else { e = p; } /* check for login and password */ if ((p = (const char *)memchr(s, '@', (e-s)))) { if ((pp = (const char *)memchr(s, ':', (p-s)))) { if ((pp-s) > 0) { ret->user = estrndup(s, (pp-s)); php_replace_controlchars_ex(ret->user, (pp - s)); } pp++; if (p-pp > 0) { ret->pass = estrndup(pp, (p-pp)); php_replace_controlchars_ex(ret->pass, (p-pp)); } } else { ret->user = estrndup(s, (p-s)); php_replace_controlchars_ex(ret->user, (p-s)); } s = p + 1; } /* check for port */ if (*s == '[' && *(e-1) == ']') { /* Short circuit portscan we're dealing with an IPv6 embedded address */ p = s; } else { /* memchr is a GNU specific extension Emulate for wide compatability */ for(p = e; *p != ':' && p >= s; p--); } if (p >= s && *p == ':') { if (!ret->port) { p++; if (e-p > 5) { /* port cannot be longer then 5 characters */ STR_FREE(ret->scheme); STR_FREE(ret->user); STR_FREE(ret->pass); efree(ret); return NULL; } else if (e - p > 0) { memcpy(port_buf, p, (e-p)); port_buf[e-p] = '\0'; ret->port = atoi(port_buf); } p--; } } else { p = e; } /* check if we have a valid host, if we don't reject the string as url */ if ((p-s) < 1) { STR_FREE(ret->scheme); STR_FREE(ret->user); STR_FREE(ret->pass); efree(ret); return NULL; } ret->host = estrndup(s, (p-s)); php_replace_controlchars_ex(ret->host, (p - s)); if (e == ue) { return ret; } s = e; nohost: if ((p = (const char *)memchr(s, '?', (ue - s)))) { pp = strchr(s, '#'); if (pp && pp < p) { p = pp; pp = strchr(pp+2, '#'); } if (p - s) { ret->path = estrndup(s, (p-s)); php_replace_controlchars_ex(ret->path, (p - s)); } if (pp) { if (pp - ++p) { ret->query = estrndup(p, (pp-p)); php_replace_controlchars_ex(ret->query, (pp - p)); } p = pp; goto label_parse; } else if (++p - ue) { ret->query = estrndup(p, (ue-p)); php_replace_controlchars_ex(ret->query, (ue - p)); } } else if ((p = (const char *)memchr(s, '#', (ue - s)))) { if (p - s) { ret->path = estrndup(s, (p-s)); php_replace_controlchars_ex(ret->path, (p - s)); } label_parse: p++; if (ue - p) { ret->fragment = estrndup(p, (ue-p)); php_replace_controlchars_ex(ret->fragment, (ue - p)); } } else { ret->path = estrndup(s, (ue-s)); php_replace_controlchars_ex(ret->path, (ue - s)); } return ret; }