/* Documented in save_complete.h */ void save_complete_init(void) { /* Match an @import rule - see CSS 2.1 G.1. */ regcomp_wrapper(&save_complete_import_re, "@import" /* IMPORT_SYM */ "[ \t\r\n\f]*" /* S* */ /* 1 */ "(" /* [ */ /* 2 3 */ "\"(([^\"]|[\\]\")*)\"" /* STRING (approximated) */ "|" /* 4 5 */ "'(([^']|[\\]')*)'" "|" /* | */ "url\\([ \t\r\n\f]*" /* URI (approximated) */ /* 6 7 */ "\"(([^\"]|[\\]\")*)\"" "[ \t\r\n\f]*\\)" "|" "url\\([ \t\r\n\f]*" /* 8 9 */ "'(([^']|[\\]')*)'" "[ \t\r\n\f]*\\)" "|" "url\\([ \t\r\n\f]*" /* 10 */ "([^) \t\r\n\f]*)" "[ \t\r\n\f]*\\)" ")", /* ] */ REG_EXTENDED | REG_ICASE); }
void url_init(void) { /* regex from RFC 2396 */ regcomp_wrapper(&url_re, "^[[:space:]]*" #define URL_RE_SCHEME 2 "(([a-zA-Z][-a-zA-Z0-9+.]*):)?" #define URL_RE_AUTHORITY 4 "(//([^/?#[:space:]]*))?" #define URL_RE_PATH 5 "([^?#[:space:]]*)" #define URL_RE_QUERY 7 "(\\?([^#[:space:]]*))?" #define URL_RE_FRAGMENT 9 "(#([^[:space:]]*))?" "[[:space:]]*$", REG_EXTENDED); regcomp_wrapper(&url_up_re, "/([^/]?|[.][^./]|[^./][.]|[^./][^./]|[^/][^/][^/]+)" "/[.][.](/|$)", REG_EXTENDED); }
/** * Initialise the curl handle and compile regular expressions. */ void init(void) { struct curl_slist *request_headers = 0; if (curl_global_init(CURL_GLOBAL_ALL)) die("Failed to initialise libcurl"); curl = curl_easy_init(); if (!curl) die("Failed to create curl handle"); if (curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback)) die("Failed to set curl options"); if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, data_callback)) die("Failed to set curl options"); if (curl_easy_setopt(curl, CURLOPT_USERAGENT, "httplint")) die("Failed to set curl options"); if (curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer)) die("Failed to set curl options"); /* remove libcurl default headers */ request_headers = curl_slist_append(request_headers, "Accept:"); request_headers = curl_slist_append(request_headers, "Pragma:"); if (curl_easy_setopt(curl, CURLOPT_HTTPHEADER, request_headers)) die("Failed to set curl options"); /* compile regular expressions */ regcomp_wrapper(&re_status_line, "^HTTP/([0-9]+)[.]([0-9]+) ([0-9][0-9][0-9]) ([\t -~€-ÿ]*)$", REG_EXTENDED); regcomp_wrapper(&re_token, "^([-0-9a-zA-Z_.!]+)", REG_EXTENDED); regcomp_wrapper(&re_token_value, "^([-0-9a-zA-Z_.!]+)(=([-0-9a-zA-Z_.!]+|\"([^\"]|[\\].)*\"))?", REG_EXTENDED); regcomp_wrapper(&re_content_type, "^([-0-9a-zA-Z_.]+)/([-0-9a-zA-Z_.]+)[ \t]*" "(;[ \t]*([-0-9a-zA-Z_.]+)=" "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", REG_EXTENDED); regcomp_wrapper(&re_absolute_uri, "^[a-zA-Z0-9]+://[^ ]+$", REG_EXTENDED); regcomp_wrapper(&re_etag, "^(W/[ \t]*)?\"([^\"]|[\\].)*\"$", REG_EXTENDED); regcomp_wrapper(&re_server, "^((([-0-9a-zA-Z_.!]+(/[-0-9a-zA-Z_.]+)?)|(\\(.*\\)))[ \t]*)+$", REG_EXTENDED); regcomp_wrapper(&re_transfer_coding, "^([-0-9a-zA-Z_.]+)[ \t]*" "(;[ \t]*([-0-9a-zA-Z_.]+)=" "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", REG_EXTENDED); regcomp_wrapper(&re_upgrade, "^([-0-9a-zA-Z_.](/[-0-9a-zA-Z_.])?)+$", REG_EXTENDED); regcomp_wrapper(&re_ugly, "^[a-zA-Z0-9]+://[^/]+[-/a-zA-Z0-9_]*$", REG_EXTENDED); regcomp_wrapper(&re_rfc1123, "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9]) " "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{4}) " "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", REG_EXTENDED); regcomp_wrapper(&re_rfc1036, "^(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday), " "([0123][0-9])-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-" "([0-9][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", REG_EXTENDED); regcomp_wrapper(&re_asctime, "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun) " "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([ 12][0-9]) " "([012][0-9]):([0-5][0-9]):([0-5][0-9]) ([0-9]{4})$", REG_EXTENDED); regcomp_wrapper(&re_cookie_nameval, "^[^;, ]+=[^;, ]*$", REG_EXTENDED); regcomp_wrapper(&re_cookie_expires, "^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), ([0123][0-9])-" "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-([0-9]{4}) " "([012][0-9]):([0-5][0-9]):([0-5][0-9]) GMT$", REG_EXTENDED); }