/* COMMAND "getUser": Extract the user identity from the URL * SIGNATURE: getUser(str) : str; */ str URLgetUser(str *retval, url *val) { const char *s; const char *p; const char *u; if (val == NULL || *val == NULL) throw(ILLARG, "url.getUser", "url missing"); if ((s = skip_scheme(*val)) == NULL || (p = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL || (s = skip_path(p, NULL, NULL)) == NULL) throw(ILLARG, "url.getUser", "bad url"); if (p == s || *p != '/' || p[1] != '~') { *retval = GDKstrdup(str_nil); } else { size_t l; u = p + 2; for (p = u; p < s && *p != '/'; p++) ; l = p - u; if ((*retval = GDKmalloc(l + 1)) != NULL) { strncpy(*retval, u, l); (*retval)[l] = 0; } } if (*retval == NULL) throw(MAL, "url.getUser", "Allocation failed"); return MAL_SUCCEED; }
/* COMMAND "getQuery": Extract the query part from the URL * SIGNATURE: getQuery(str) : str; */ str URLgetQuery(str *retval, url *val) { const char *s; const char *q; if (val == NULL || *val == NULL) throw(ILLARG, "url.getQuery", "url missing"); if ((s = skip_scheme(*val)) == NULL || (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL || (q = skip_path(s, NULL, NULL)) == NULL || (s = skip_search(q)) == NULL) throw(ILLARG, "url.getQuery", "bad url"); if (*q == '?') { size_t l; q++; l = s - q; if ((*retval = GDKmalloc(l + 1)) != NULL) { strncpy(*retval, q, l); (*retval)[l] = 0; } } else { *retval = GDKstrdup(str_nil); } if (*retval == NULL) throw(MAL, "url.getQuery", "Allocation failed"); return MAL_SUCCEED; }
/* COMMAND "getDomain": Extract the Internet domain from the URL * SIGNATURE: getDomain(str) : str; */ str URLgetDomain(str *retval, url *val) { const char *s; const char *h = NULL; const char *p = NULL; if (val == NULL || *val == NULL) throw(ILLARG, "url.getDomain", "url missing"); if ((s = skip_scheme(*val)) == NULL || (s = skip_authority(s, NULL, NULL, &h, &p)) == NULL) throw(ILLARG, "url.getDomain", "bad url"); if (h == NULL) { *retval = GDKstrdup(str_nil); } else { size_t l; if (p != NULL) p--; else p = s; l = 0; while (p > h && p[-1] != '.') { p--; l++; } if ((*retval = GDKmalloc(l + 1)) != NULL) { strncpy(*retval, p, l); (*retval)[l] = 0; } } if (*retval == NULL) throw(MAL, "url.getDomain", "Allocation failed"); return MAL_SUCCEED; }
/* COMMAND "getPort": Extract the port id from the URL * SIGNATURE: getPort(str) : str; */ str URLgetPort(str *retval, url *val) { const char *s; const char *p = NULL; if (val == NULL || *val == NULL) throw(ILLARG, "url.getPort", "url missing"); if ((s = skip_scheme(*val)) == NULL || (s = skip_authority(s, NULL, NULL, NULL, &p)) == NULL) throw(ILLARG, "url.getPort", "bad url"); if (p == NULL) { *retval = GDKstrdup(str_nil); } else { size_t l = s - p; if ((*retval = GDKmalloc(l + 1)) != NULL) { strncpy(*retval, p, l); (*retval)[l] = 0; } } if (*retval == NULL) throw(MAL, "url.getPort", "Allocation failed"); return MAL_SUCCEED; }
/* COMMAND "getBasename": Extract the base of the last file name of the URL, * thus, excluding the file extension. * SIGNATURE: getBasename(str) : str; */ str URLgetBasename(str *retval, url *val) { const char *s; const char *b = NULL; const char *e = NULL; if (val == NULL || *val == NULL) throw(ILLARG, "url.getBasename", "url missing"); if ((s = skip_scheme(*val)) == NULL || (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL || (s = skip_path(s, &b, &e)) == NULL) throw(ILLARG, "url.getBasename", "bad url"); if (b == NULL) { *retval = GDKstrdup(str_nil); } else { size_t l; if (e != NULL) { l = e - b; } else { l = s - b; } if ((*retval = GDKmalloc(l + 1)) != NULL) { strncpy(*retval, b, l); (*retval)[l] = 0; } } if (*retval == NULL) throw(MAL, "url.getBasename", "Allocation failed"); return MAL_SUCCEED; }
/* COMMAND "getExtension": Extract the file extension of the URL * SIGNATURE: getExtension(str) : str; */ str URLgetExtension(str *retval, url *val) { const char *s; const char *e = NULL; if (val == NULL || *val == NULL) throw(ILLARG, "url.getExtension", "url missing"); if ((s = skip_scheme(*val)) == NULL || (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL || (s = skip_path(s, NULL, &e)) == NULL) throw(ILLARG, "url.getExtension", "bad url"); if (e == NULL) { *retval = GDKstrdup(str_nil); } else { size_t l = s - e; assert(*e == '.'); if ((*retval = GDKmalloc(l)) != NULL) { strncpy(*retval, e + 1, l - 1); (*retval)[l - 1] = 0; } } if (*retval == NULL) throw(MAL, "url.getExtension", "Allocation failed"); return MAL_SUCCEED; }
int xps_url_is_remote(char *path) { char *p = skip_authority(skip_scheme(path)); return p != path; }
static char * xps_clean_path(char *name) { char *p, *q, *dotdot, *start; int rooted; start = skip_scheme(name); start = skip_authority(start); rooted = start[0] == '/'; /* * invariants: * p points at beginning of path element we're considering. * q points just past the last path element we wrote (no slash). * dotdot points just past the point where .. cannot backtrack * any further (no slash). */ p = q = dotdot = start + rooted; while (*p) { if(p[0] == '/') /* null element */ p++; else if (p[0] == '.' && SEP(p[1])) p += 1; /* don't count the separator in case it is nul */ else if (p[0] == '.' && p[1] == '.' && SEP(p[2])) { p += 2; if (q > dotdot) /* can backtrack */ { while(--q > dotdot && *q != '/') ; } else if (!rooted) /* /.. is / but ./../ is .. */ { if (q != start) *q++ = '/'; *q++ = '.'; *q++ = '.'; dotdot = q; } } else /* real path element */ { if (q != start+rooted) *q++ = '/'; while ((*q = *p) != '/' && *q != 0) p++, q++; } } if (q == start) /* empty string is really "." */ *q++ = '.'; *q = '\0'; return name; }
/* COMMAND "getRobotURL": Extract the location of the robot control file * SIGNATURE: getRobotURL(str) : str; */ str URLgetRobotURL(str *retval, url *val) { const char *s; size_t l; if (val == NULL || *val == NULL) throw(ILLARG, "url.getQuery", "url missing"); if ((s = skip_scheme(*val)) == NULL || (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL) throw(ILLARG, "url.getQuery", "bad url"); l = s - *val; if ((*retval = GDKmalloc(l + sizeof("/robots.txt"))) == NULL) throw(MAL, "url.getQuery", "Allocation failed"); sprintf(*retval, "%.*s/robots.txt", (int) l, *val); return MAL_SUCCEED; }
void xps_resolve_url(char *output, char *base_uri, char *path, int output_size) { char *p = skip_authority(skip_scheme(path)); if (p != path || path[0] == '/') { fz_strlcpy(output, path, output_size); } else { int len = fz_strlcpy(output, base_uri, output_size); if (len == 0 || output[len-1] != '/') fz_strlcat(output, "/", output_size); fz_strlcat(output, path, output_size); } xps_clean_path(output); }
/* COMMAND "getAnchor": Extract an anchor (reference) from the URL * SIGNATURE: getAnchor(url) : str; */ str URLgetAnchor(str *retval, url *val) { const char *s; if (val == NULL || *val == NULL) throw(ILLARG, "url.getAnchor", "url missing"); if ((s = skip_scheme(*val)) == NULL || (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL || (s = skip_path(s, NULL, NULL)) == NULL || (s = skip_search(s)) == NULL) throw(ILLARG, "url.getAnchor", "bad url"); if (*s == '#') s++; else s = str_nil; if ((*retval = GDKstrdup(s)) == NULL) throw(MAL, "url.getAnchor", "Allocation failed"); return MAL_SUCCEED; }
/* COMMAND "getContext": Extract the path context from the URL * SIGNATURE: getContext(str) : str; */ str URLgetContext(str *retval, url *val) { const char *s; const char *p; if (val == NULL || *val == NULL) throw(ILLARG, "url.getContext", "url missing"); if ((s = skip_scheme(*val)) == NULL || (p = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL || (s = skip_path(p, NULL, NULL)) == NULL) throw(ILLARG, "url.getContext", "bad url"); if (p == s) { *retval = GDKstrdup(str_nil); } else if ((*retval = GDKmalloc(s - p + 1)) != NULL) { strncpy(*retval, p, s - p); (*retval)[s - p] = 0; } if (*retval == NULL) throw(MAL, "url.getContext", "Allocation failed"); return MAL_SUCCEED; }