JsObject::Ptr parse(String::CPtr urlStr) { static const String::CPtr colon = String::create(":"); static const String::CPtr slash = String::create("/"); static const String::CPtr sharp = String::create("#"); static const String::CPtr question = String::create("?"); if (!urlStr) { LIBJ_NULL_PTR(JsObject, nullp); return nullp; } struct parsed_url* url = parse_url(urlStr->toStdString().c_str()); JsObject::Ptr obj = JsObject::create(); obj->put(HREF, urlStr); if (url->scheme) { obj->put(PROTOCOL, String::create(url->scheme)->toLowerCase()); } LIBJ_NULL_CPTR(String, port); if (url->port) { port = String::create(url->port); obj->put(PORT, port); } if (url->host) { String::CPtr hostname = String::create(url->host)->toLowerCase(); obj->put(HOSTNAME, hostname); if (port) { obj->put(HOST, hostname->concat(colon)->concat(port)); } else { obj->put(HOST, hostname); } } LIBJ_NULL_CPTR(String, query); if (url->query) { query = String::create(url->query); obj->put(QUERY, query); } if (url->path) { String::CPtr pathname = slash->concat(String::create(url->path)); obj->put(PATHNAME, pathname); if (query) { obj->put(PATH, pathname->concat(question)->concat(query)); } else { obj->put(PATH, pathname); } } if (url->username && url->password) { String::CPtr auth = String::create(url->username); auth = auth->concat(colon); auth = auth->concat(String::create(url->password)); obj->put(AUTH, auth); } if (url->fragment) { String::CPtr hash = sharp->concat(String::create(url->fragment)); obj->put(HASH, hash); } parsed_url_free(url); return obj; }
TEST(GTestUrlParser, TestParseUrl1) { struct parsed_url* url = parse_url("/foo/bar?abc=123&pqr=xyz#fr"); ASSERT_FALSE(url->scheme); ASSERT_FALSE(url->host); ASSERT_FALSE(url->port); ASSERT_EQ(String::create(url->path) ->compareTo(String::create("foo/bar")), 0); ASSERT_EQ(String::create(url->query) ->compareTo(String::create("abc=123&pqr=xyz")), 0); ASSERT_EQ(String::create(url ->fragment)->compareTo(String::create("fr")), 0); parsed_url_free(url); }
TEST(GTestUrlParser, TestParseUrl2) { struct parsed_url* url = parse_url( "http://*****:*****@www.gtest.com:8888/foo/bar?abc=123&pqr=xyz#fr"); ASSERT_EQ(String::create(url->scheme) ->compareTo(String::create("http")), 0); ASSERT_EQ(String::create(url->host) ->compareTo(String::create("www.gtest.com")), 0); ASSERT_EQ(String::create(url->port) ->compareTo(String::create("8888")), 0); ASSERT_EQ(String::create(url->path) ->compareTo(String::create("foo/bar")), 0); ASSERT_EQ(String::create(url->query) ->compareTo(String::create("abc=123&pqr=xyz")), 0); ASSERT_EQ(String::create(url->username) ->compareTo(String::create("user1")), 0); ASSERT_EQ(String::create(url->password) ->compareTo(String::create("passwd2")), 0); ASSERT_EQ(String::create(url->fragment) ->compareTo(String::create("fr")), 0); parsed_url_free(url); }
/* * See RFC 1738, 3986 */ struct parsed_url * parse_url(const char *url) { struct parsed_url *purl; const char *tmpstr; const char *curstr; int len; int i; int userpass_flag; int bracket_flag; /* Allocate the parsed url storage */ purl = malloc(sizeof(struct parsed_url)); if ( NULL == purl ) { return NULL; } purl->scheme = NULL; purl->host = NULL; purl->port = NULL; purl->path = NULL; purl->query = NULL; purl->fragment = NULL; purl->username = NULL; purl->password = NULL; curstr = url; /* * <scheme>:<scheme-specific-part> * <scheme> := [a-z\+\-\.]+ * upper case = lower case for resiliency */ /* Read scheme */ tmpstr = strchr(curstr, ':'); if ( NULL == tmpstr ) { /* Not found the character */ parsed_url_free(purl); return NULL; } /* Get the scheme length */ len = tmpstr - curstr; /* Check restrictions */ for ( i = 0; i < len; i++ ) { if ( !_is_scheme_char(curstr[i]) ) { /* Invalid format */ parsed_url_free(purl); return NULL; } } /* Copy the scheme to the storage */ purl->scheme = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->scheme ) { parsed_url_free(purl); return NULL; } (void)strncpy(purl->scheme, curstr, len); purl->scheme[len] = '\0'; /* Make the character to lower if it is upper case. */ for ( i = 0; i < len; i++ ) { purl->scheme[i] = tolower(purl->scheme[i]); } /* Skip ':' */ tmpstr++; curstr = tmpstr; /* * //<user>:<password>@<host>:<port>/<url-path> * Any ":", "@" and "/" must be encoded. */ /* Eat "//" */ for ( i = 0; i < 2; i++ ) { if ( '/' != *curstr ) { parsed_url_free(purl); return NULL; } curstr++; } /* Check if the user (and password) are specified. */ userpass_flag = 0; tmpstr = curstr; while ( '\0' != *tmpstr ) { if ( '@' == *tmpstr ) { /* Username and password are specified */ userpass_flag = 1; break; } else if ( '/' == *tmpstr ) { /* End of <host>:<port> specification */ userpass_flag = 0; break; } tmpstr++; } /* User and password specification */ tmpstr = curstr; if ( userpass_flag ) { /* Read username */ while ( '\0' != *tmpstr && ':' != *tmpstr && '@' != *tmpstr ) { tmpstr++; } len = tmpstr - curstr; purl->username = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->username ) { parsed_url_free(purl); return NULL; } (void)strncpy(purl->username, curstr, len); purl->username[len] = '\0'; /* Proceed current pointer */ curstr = tmpstr; if ( ':' == *curstr ) { /* Skip ':' */ curstr++; /* Read password */ tmpstr = curstr; while ( '\0' != *tmpstr && '@' != *tmpstr ) { tmpstr++; } len = tmpstr - curstr; purl->password = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->password ) { parsed_url_free(purl); return NULL; } (void)strncpy(purl->password, curstr, len); purl->password[len] = '\0'; curstr = tmpstr; } /* Skip '@' */ if ( '@' != *curstr ) { parsed_url_free(purl); return NULL; } curstr++; } if ( '[' == *curstr ) { bracket_flag = 1; } else { bracket_flag = 0; } /* Proceed on by delimiters with reading host */ tmpstr = curstr; while ( '\0' != *tmpstr ) { if ( bracket_flag && ']' == *tmpstr ) { /* End of IPv6 address. */ tmpstr++; break; } else if ( !bracket_flag && (':' == *tmpstr || '/' == *tmpstr) ) { /* Port number is specified. */ break; } tmpstr++; } len = tmpstr - curstr; purl->host = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->host || len <= 0 ) { parsed_url_free(purl); return NULL; } (void)strncpy(purl->host, curstr, len); purl->host[len] = '\0'; curstr = tmpstr; /* Is port number specified? */ if ( ':' == *curstr ) { curstr++; /* Read port number */ tmpstr = curstr; while ( '\0' != *tmpstr && '/' != *tmpstr ) { tmpstr++; } len = tmpstr - curstr; purl->port = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->port ) { parsed_url_free(purl); return NULL; } (void)strncpy(purl->port, curstr, len); purl->port[len] = '\0'; curstr = tmpstr; } /* End of the string */ if ( '\0' == *curstr ) { return purl; } /* Skip '/' */ if ( '/' != *curstr ) { parsed_url_free(purl); return NULL; } curstr++; /* Parse path */ tmpstr = curstr; while ( '\0' != *tmpstr && '#' != *tmpstr && '?' != *tmpstr ) { tmpstr++; } len = tmpstr - curstr; purl->path = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->path ) { parsed_url_free(purl); return NULL; } (void)strncpy(purl->path, curstr, len); purl->path[len] = '\0'; curstr = tmpstr; /* Is query specified? */ if ( '?' == *curstr ) { /* Skip '?' */ curstr++; /* Read query */ tmpstr = curstr; while ( '\0' != *tmpstr && '#' != *tmpstr ) { tmpstr++; } len = tmpstr - curstr; purl->query = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->query ) { parsed_url_free(purl); return NULL; } (void)strncpy(purl->query, curstr, len); purl->query[len] = '\0'; curstr = tmpstr; } /* Is fragment specified? */ if ( '#' == *curstr ) { /* Skip '#' */ curstr++; /* Read fragment */ tmpstr = curstr; while ( '\0' != *tmpstr ) { tmpstr++; } len = tmpstr - curstr; purl->fragment = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->fragment ) { parsed_url_free(purl); return NULL; } (void)strncpy(purl->fragment, curstr, len); purl->fragment[len] = '\0'; curstr = tmpstr; } return purl; }
/* * Function: _wilddog_url_parseUrl * Description: parse url using wilddog format. * Input: url: Input url. * Output: N/A * Return: Pointer to the url structure. */ Wilddog_Url_T * WD_SYSTEM _wilddog_url_parseUrl(Wilddog_Str_T * url) { struct parsed_url * p_paresd_url = NULL; Wilddog_Url_T * p_wd_url = NULL; int len = 2; wilddog_assert(url, NULL); p_paresd_url = parse_url((char*)url); if(NULL == p_paresd_url) return NULL; p_wd_url = (Wilddog_Url_T *)wmalloc(sizeof(Wilddog_Url_T)); if(NULL == p_wd_url) { wilddog_debug_level(WD_DEBUG_ERROR, "cannot malloc p_wd_url!\n"); parsed_url_free(p_paresd_url); return NULL; } if(NULL != p_paresd_url->host) { p_wd_url->p_url_host = (Wilddog_Str_T *)wmalloc( \ strlen(p_paresd_url->host) + 1); if(NULL == p_wd_url->p_url_host) { wilddog_debug_level(WD_DEBUG_ERROR, "cannot malloc p_url_host!\n"); _wilddog_url_freeParsedUrl(p_wd_url); parsed_url_free(p_paresd_url); return NULL; } strncpy((char*)p_wd_url->p_url_host, (char*)p_paresd_url->host, \ strlen((const char*)p_paresd_url->host)); } else return NULL; if(NULL == p_paresd_url->path) { p_wd_url->p_url_path = (Wilddog_Str_T *)wmalloc(len); if(NULL == p_wd_url->p_url_path) { _wilddog_url_freeParsedUrl(p_wd_url); parsed_url_free(p_paresd_url); return NULL; } p_wd_url->p_url_path[0] = '/'; } else { len += strlen((const char*)p_paresd_url->path); if(WILDDOG_ERR_NOERR != \ _wilddogurl_checkPath((Wilddog_Str_T*)p_paresd_url->path) ) { _wilddog_url_freeParsedUrl(p_wd_url); parsed_url_free(p_paresd_url); return NULL; } p_wd_url->p_url_path = (Wilddog_Str_T *)wmalloc(len + 1); if(NULL == p_wd_url->p_url_path) { _wilddog_url_freeParsedUrl(p_wd_url); parsed_url_free(p_paresd_url); return NULL; } snprintf((char*)p_wd_url->p_url_path, len, "/%s", \ p_paresd_url->path); len = strlen((const char*)p_wd_url->p_url_path); if(len > 1 && p_wd_url->p_url_path[len - 1] == '/') { p_wd_url->p_url_path[len - 1] = 0; } } if(NULL != p_paresd_url->query) { p_wd_url->p_url_query = (Wilddog_Str_T *)wmalloc( \ strlen((const char*)p_paresd_url->query) + 1); if(NULL == p_wd_url->p_url_query) { _wilddog_url_freeParsedUrl(p_wd_url); parsed_url_free(p_paresd_url); return NULL; } strncpy((char*)p_wd_url->p_url_query, (char*)p_paresd_url->query, \ strlen((const char*)p_paresd_url->query)); } parsed_url_free(p_paresd_url); return p_wd_url; }