gboolean camel_url_file_end (const gchar *in, const gchar *pos, const gchar *inend, urlmatch_t *match) { register const gchar *inptr = pos; gchar close_brace; inptr += strlen (match->pattern); if (*inptr == '/') inptr++; close_brace = url_stop_at_brace (in, match->um_so, NULL); while (inptr < inend && is_urlsafe (*inptr) && *inptr != close_brace) inptr++; if (inptr == pos) return FALSE; match->um_eo = (inptr - in); return TRUE; }
gboolean camel_url_web_end (const gchar *in, const gchar *pos, const gchar *inend, urlmatch_t *match) { register const gchar *inptr = pos; gboolean passwd = FALSE; const gchar *save; gchar close_brace, open_brace; gint brace_stack = 0; gint port; inptr += strlen (match->pattern); close_brace = url_stop_at_brace (in, match->um_so, &open_brace); /* find the end of the domain */ if (is_atom (*inptr)) { /* might be a domain or user@domain */ save = inptr; while (inptr < inend) { if (!is_atom (*inptr)) break; inptr++; while (inptr < inend && is_atom (*inptr)) inptr++; if ((inptr + 1) < inend && *inptr == '.' && (is_atom (inptr[1]) || inptr[1] == '/')) inptr++; } if (*inptr != '@') inptr = save; else inptr++; goto domain; } else if (is_domain (*inptr)) { domain: while (inptr < inend) { if (!is_domain (*inptr)) break; inptr++; while (inptr < inend && is_domain (*inptr)) inptr++; if ((inptr + 1) < inend && *inptr == '.' && (is_domain (inptr[1]) || inptr[1] == '/')) inptr++; } } else { return FALSE; } if (inptr < inend) { switch (*inptr) { case ':': /* we either have a port or a password */ inptr++; if (is_digit (*inptr) || passwd) { port = (*inptr++ - '0'); while (inptr < inend && is_digit (*inptr) && port < 65536) port = (port * 10) + (*inptr++ - '0'); if (!passwd && (port >= 65536 || *inptr == '@')) { if (inptr < inend) { /* this must be a password? */ goto passwd; } inptr--; } } else { passwd: passwd = TRUE; save = inptr; while (inptr < inend && is_atom (*inptr)) inptr++; if ((inptr + 2) < inend) { if (*inptr == '@') { inptr++; if (is_domain (*inptr)) goto domain; } return FALSE; } } if (inptr >= inend || *inptr != '/') break; /* we have a '/' so there could be a path - fall through */ case '/': /* we've detected a path component to our url */ inptr++; case '?': while (inptr < inend && is_urlsafe (*inptr)) { if (*inptr == open_brace) { brace_stack++; } else if (*inptr == close_brace) { brace_stack--; if (brace_stack == -1) break; } inptr++; } break; default: break; } } /* urls are extremely unlikely to end with any * punctuation, so strip any trailing * punctuation off. Also strip off any closing * double-quotes. */ while (inptr > pos && strchr (",.:;?!-|}])\"", inptr[-1])) inptr--; match->um_eo = (inptr - in); return TRUE; }
gboolean url_web_end (const char *in, const char *pos, const char *inend, urlmatch_t *match) { register const char *inptr = pos; gboolean openbracket = FALSE; gboolean passwd = FALSE; const char *save; char close_brace; int port, val, n; char *end; inptr += strlen (match->pattern); close_brace = url_stop_at_brace (in, match->um_so); /* find the end of the domain */ if (is_digit (*inptr)) { goto ip_literal2; } else if (is_atom (*inptr)) { /* might be a domain or user@domain */ save = inptr; while (inptr < inend) { if (!is_atom (*inptr)) break; inptr++; while (inptr < inend && is_atom (*inptr)) inptr++; if ((inptr + 1) < inend && *inptr == '.' && is_atom (inptr[1])) inptr++; } if (*inptr != '@') inptr = save; else inptr++; if (*inptr == '[') { /* IPv6 (or possibly IPv4) address literal */ goto ip_literal; } if (is_domain (*inptr)) { /* domain name or IPv4 address */ goto domain; } } else if (*inptr == '[') { ip_literal: openbracket = TRUE; inptr++; if (is_digit (*inptr)) { ip_literal2: /* could be IPv4 or IPv6 */ if ((val = strtol (inptr, &end, 10)) < 0) return FALSE; } else if ((*inptr >= 'A' && *inptr <= 'F') || (*inptr >= 'a' && *inptr <= 'f')) { /* IPv6 address literals are in hex */ if ((val = strtol (inptr, &end, 16)) < 0 || *end != ':') return FALSE; } else if (*inptr == ':') { /* IPv6 can start with a ':' */ end = (char *) inptr; val = 256; /* invalid value */ } else { return FALSE; } switch (*end) { case '.': /* IPv4 address literal */ n = 1; do { if (val > 255 || *end != '.') return FALSE; inptr = end + 1; if ((val = strtol (inptr, &end, 10)) < 0) return FALSE; n++; } while (n < 4); if (val > 255 || n < 4 || (openbracket && *end != ']')) return FALSE; inptr = end + 1; break; case ':': /* IPv6 address literal */ if (!openbracket) return FALSE; do { if (end[1] != ':') { inptr = end + 1; if ((val = strtol (inptr, &end, 16)) < 0) return FALSE; } else { inptr = end; end++; } } while (end > inptr && *end == ':'); if (*end != ']') return FALSE; inptr = end + 1; break; default: return FALSE; } } else if (is_domain (*inptr)) { domain: while (inptr < inend) { if (!is_domain (*inptr)) break; inptr++; while (inptr < inend && is_domain (*inptr)) inptr++; if ((inptr + 1) < inend && *inptr == '.' && (is_domain (inptr[1]) || inptr[1] == '/')) inptr++; } } else { return FALSE; } if (inptr < inend) { switch (*inptr) { case ':': /* we either have a port or a password */ inptr++; if (is_digit (*inptr) || passwd) { port = (*inptr++ - '0'); while (inptr < inend && is_digit (*inptr) && port < 65536) port = (port * 10) + (*inptr++ - '0'); if (!passwd && (port >= 65536 || *inptr == '@')) { if (inptr < inend) { /* this must be a password? */ goto passwd; } inptr--; } } else { passwd: passwd = TRUE; save = inptr; while (inptr < inend && is_atom (*inptr)) inptr++; if ((inptr + 2) < inend) { if (*inptr == '@') { inptr++; if (is_domain (*inptr)) goto domain; } return FALSE; } } if (inptr >= inend || *inptr != '/') break; /* we have a '/' so there could be a path - fall through */ case '/': /* we've detected a path component to our url */ inptr++; while (inptr < inend && is_urlsafe (*inptr) && *inptr != close_brace) inptr++; break; default: break; } } /* urls are extremely unlikely to end with any * punctuation, so strip any trailing * punctuation off. Also strip off any closing * braces or quotes. */ while (inptr > pos && strchr (",.:;?!-|)}]'\"", inptr[-1])) inptr--; match->um_eo = (inptr - in); return TRUE; }