/
http_url.c
151 lines (141 loc) · 3.88 KB
/
http_url.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#include "http_url.h"
int _check_protocol(char *protocol, int len)
{
int i;
for(i = 0; i < len; i++) {
if(!isalnum(protocol[i])) {
return 0;
}
}
return 1;
}
int _check_host(char *host, int len)
{
int i;
for(i = 0; i < len; i++) {
if(!(isalnum(host[i]) || host[i] == '.' || host[i] == '-')) {
return 0;
}
}
return 1;
}
int _check_port(char *port, int len)
{
int i;
for(i = 0; i < len; i++) {
if(!isdigit(port[i])) {
return 0;
}
}
return 1;
}
int _check_path(char *path, int len)
{
int i;
for(i = 0; i < len; i++) {
if(!(isalnum(path[i]) || path[i] == '/' || path[i] == ';'
|| path[i] == ':' || path[i] == '@' || path[i] == '%'
|| path[i] == '&' || path[i] == '='
|| path[i] == '_' || path[i] == '.'
|| path[i] == '-' || path[i] == '+')) {
return 0;
}
}
return 1;
}
int http_url_parse_s(http_url_t *http_url, char *url)
{
bzero(http_url, sizeof(http_url_t));
char *str, *cstr = url;
char *estr = url + strlen(url);
str = strstr(url, "://");
/* protocol */
if(str != NULL) {
if(_check_protocol(cstr, str - cstr)) {
memcpy(http_url->protocol, cstr, str - cstr);
cstr = str + 3;
} else {
return URL_PROTO_UNRECOGNIZED;
}
}
str = strchr(cstr, ':');
if(str != NULL) {
if( _check_host(cstr, str - cstr)) { /* contain port */
memcpy(http_url->host, cstr, str - cstr);
cstr = str + 1;
str = strchr(cstr, '/'); /* copy port */
str = str != NULL ? str : estr;
if(_check_port(cstr, str - cstr)) {
memcpy(http_url->port, cstr, str - cstr);
} else {
return URL_PORT_UNRECOGNIZED;
}
cstr = str;
} else {
return URL_HOST_UNRECOGNIZED;
}
} else { /* no port */
str = strchr(cstr, '/'); /* copy port */
str = str != NULL ? str : estr;
if(_check_host(cstr, str - cstr)) {
memcpy(http_url->host, cstr, str - cstr); /* copy host */
cstr = str;
} else {
return URL_HOST_UNRECOGNIZED;
}
}
/* copy path */
str = strchr(cstr, '?');
if(str != NULL) { /* contain search */
if(_check_path(cstr, str - cstr)) {
memcpy(http_url->path, cstr, str - cstr);
cstr = str + 1;
strcpy(http_url->search, cstr);
} else {
return URL_PATH_UNRECOGNIZED;
}
} else { /* not contain search */
if(_check_path(cstr, strlen(cstr))) {
strcpy(http_url->path, cstr);
} else {
return URL_PATH_UNRECOGNIZED;
}
}
return URL_RECOGNIZED;
}
int http_url_parse(http_url_t *http_url, char *url)
{
#ifndef __HTTP_URL_PADDING
#define __HTTP_URL_PADDING(_dest, _src, _so, _eo) { \
bzero(_dest, sizeof(_dest));\
if(_so < _eo) { memcpy(_dest, _src + _so, _eo - _so); } \
}
#endif
const size_t nmatch = 12;
int status = URL_UNRECOGNIZED, cflag, ecode;
regex_t reg;
char ebuf[128];
regmatch_t pmatch[nmatch];
cflag = REG_NEWLINE | REG_EXTENDED;
bzero(http_url, sizeof(http_url_t));
const char *pattern = __HTTPURL;
/*
__HTTPURL below:
^((http[s]?)://)?([a-zA-Z0-9][-a-zA-Z0-9_]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9_]{0,62})+\.?)(:[0-9]+)?((/[a-zA-Z0-9;:@&=_.]*(/[a-zA-Z0-9;:@&=_.]*)*)(\?([-a-zA-Z0-9;:@&=,]*))?)?$
*/
if((ecode = regcomp(®, pattern, cflag)) == 0) {
if(regexec(®, url, nmatch, pmatch, 0) != REG_NOMATCH) {
status = URL_RECOGNIZED;
__HTTP_URL_PADDING(http_url->protocol, url, pmatch[2].rm_so, pmatch[2].rm_eo);
__HTTP_URL_PADDING(http_url->host, url, pmatch[3].rm_so, pmatch[3].rm_eo);
__HTTP_URL_PADDING(http_url->port, url, pmatch[6].rm_so, pmatch[6].rm_eo);
__HTTP_URL_PADDING(http_url->path, url, pmatch[8].rm_so, pmatch[8].rm_eo);
__HTTP_URL_PADDING(http_url->search, url, pmatch[11].rm_so, pmatch[11].rm_eo);
}
} else {
regerror(ecode, ®, ebuf, sizeof(ebuf));
fprintf(stderr, "%s: url '%s' parse failed.\n", ebuf, url);
}
regfree(®);
return status;
}