InputSource SourceFromStream(const char8 *description, FILE *file) { Entity e; e = NewExternalEntity("",0,description,0,0); if(!strchr8(description, '/')) EntitySetBaseURL(e, default_base_url()); return NewInputSource(e, MakeFILE16FromFILE(file, "r")); }
InputSource SourceFromFILE16(const char8 *description, FILE16 *file16) { Entity e; e = NewExternalEntity(0, 0, description, 0, 0); if(!strchr8(description, '/')) { char8 *base = default_base_url(); EntitySetBaseURL(e, base); Free(base); } return NewInputSource(e, file16); }
char *url_merge(const char *url, const char *base, char **_scheme, char **_host, int *_port, char **_path) { char *merged_scheme, *merged_host, *merged_path, *merged_url; char *scheme=0, *host=0, *path=0; char *base_scheme=0, *base_host=0, *base_path=0; char *default_base=0; int port, base_port, merged_port, i, j; char *p; /* First see if we have an absolute URL */ parse_url(url, &scheme, &host, &port, &path); if(scheme && (host || *path == '/')) { merged_scheme = scheme; merged_host = host; merged_port = port; merged_path = path; merged_url = strdup8(url); goto ok; } /* Relative URL, so we need the base URL */ if(!base) base = default_base = default_base_url(); parse_url(base, &base_scheme, &base_host, &base_port, &base_path); if(base_scheme && (base_host || *base_path == '/')) ; else { LT_ERROR1(LEFILE, "Error: bad base URL <%s>\n", base); goto bad; } /* Determine merged path */ if(path[0] == '/') { /* not relative, use as-is */ merged_path = path; path = 0; } else { /* relative, append to base path */ merged_path = Malloc(strlen(base_path) + strlen(path) + 1); strcpy(merged_path, base_path); /* strip last component of base */ for(i=strlen(merged_path)-1; i>=0 && merged_path[i] != '/'; i--) merged_path[i] = '\0'; /* append relative path */ strcat(merged_path, path); /* Remove . and .. components from path */ p = merged_path; for(i=0; p[i]; ) { assert(p[i] == '/'); /* find next segment */ for(j=i+1; p[j] && p[j] != '/'; j++) ; /* Do we have "." ? */ if(j - i == 2 && p[i+1] == '.') { strcpy(&p[i+1], p[j] ? &p[j+1] : &p[j]); continue; } /* Do we have "<segment>/.." with <segment> != ".." ? */ /* (We know we're not looking at "./" so we don't have to * worry about "./..") */ if(p[j] == '/' && p[j+1] == '.' && p[j+2] == '.' && (p[j+3] == '/' || p[j+3] == '\0') && (j - i != 3 || p[i+1] != '.' || p[i+2] != '.')) { strcpy(&p[i+1], p[j+3] ? &p[j+4] : &p[j+3]); i = 0; /* start again from beginning */ continue; } /* move to next segment */ i = j; } } /* Check for deviant relative URLs like file:foo */ if(scheme && !host && *path != '/') { if(strcmp(scheme, base_scheme) == 0) { WARN1(LEFILE, "Warning: relative URL <%s> contains scheme, contrary to RFC 1808\n", url); } else { LT_ERROR2(LEFILE, "Error: relative URL <%s> has scheme different from base <%s>\n", url, base); goto bad; } } /* Return the parts and the whole thing */ merged_scheme = base_scheme; if(scheme) Free(scheme); if(host) { merged_host = host; Free(base_host); merged_port = port; } else { merged_host = base_host; merged_port = base_port; } Free(path); Free(base_path); merged_url = Malloc(strlen(merged_scheme) + 1 + (merged_host ? 2 + strlen(merged_host) + 10 : 0) + strlen(merged_path) + 1); if(merged_host) { if(merged_port == -1) sprintf(merged_url, "%s://%s%s", merged_scheme, merged_host, merged_path); else sprintf(merged_url, "%s://%s:%d%s", merged_scheme, merged_host, merged_port, merged_path); } else sprintf(merged_url, "%s:%s", merged_scheme, merged_path); ok: Free(default_base); if(_scheme) *_scheme = merged_scheme; else Free(merged_scheme); if(_host) *_host = merged_host; else Free(merged_host); if(_port) *_port = merged_port; if(_path) *_path = merged_path; else Free(merged_path); return merged_url; bad: Free(default_base); Free(scheme); Free(host); Free(path); Free(base_scheme); Free(base_host); Free(base_path); return NULL; }