static char *my_urlword(pool *p, const char **line) { int i; for (i = 0; urlword_dlm[i]; i++) { int stop = urlword_dlm[i]; char *pos = strchr(*line, stop); char *res; if (!pos) { if (!urlword_dlm[i+1]) { int len = strlen(*line); res = ap_pstrndup(p, *line, len); *line += len; return res; } continue; } res = ap_pstrndup(p, *line, pos - *line); while (*pos == stop) { ++pos; } *line = pos; return res; } return NULL; }
/* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy" * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html * for the format of the "CONNECT host:port HTTP/1.0" request */ API_EXPORT(int) ap_parse_hostinfo_components(pool *p, const char *hostinfo, uri_components * uptr) { const char *s; char *endstr; /* Initialize the structure. parse_uri() and parse_uri_components() * can be called more than once per request. */ memset(uptr, '\0', sizeof(*uptr)); uptr->is_initialized = 1; uptr->hostinfo = ap_pstrdup(p, hostinfo); /* We expect hostinfo to point to the first character of * the hostname. There must be a port, separated by a colon */ s = strchr(hostinfo, ':'); if (s == NULL) { return HTTP_BAD_REQUEST; } uptr->hostname = ap_pstrndup(p, hostinfo, s - hostinfo); ++s; uptr->port_str = ap_pstrdup(p, s); if (*s != '\0') { uptr->port = (unsigned short)ap_strtol(uptr->port_str, &endstr, 10); if (*endstr == '\0') { return HTTP_OK; } /* Invalid characters after ':' found */ } return HTTP_BAD_REQUEST; }
static VALUE connection_set_auth_type(VALUE self, VALUE val) { conn_rec *conn; Data_Get_Struct(self, conn_rec, conn); Check_Type(val, T_STRING); conn->ap_auth_type = ap_pstrndup(conn->pool, RSTRING(val)->ptr, RSTRING(val)->len); return val; }
static BOOL SendResponseHeaderEx(isapi_cid *cid, const char *stat, const char *head, DWORD statlen, DWORD headlen) { int termarg; char *termch; if (!stat || statlen == 0 || !*stat) { stat = "Status: 200 OK"; } else { char *newstat; newstat = ap_palloc(cid->r->pool, statlen + 9); strcpy(newstat, "Status: "); ap_cpystrn(newstat + 8, stat, statlen + 1); stat = newstat; } if (!head || headlen == 0 || !*head) { head = "\r\n"; } else { if (head[headlen]) { /* Whoops... not NULL terminated */ head = ap_pstrndup(cid->r->pool, head, headlen); } } /* Parse them out, or die trying */ cid->status = ap_scan_script_header_err_strs(cid->r, NULL, &termch, &termarg, stat, head, NULL); cid->ecb->dwHttpStatusCode = cid->r->status; /* All the headers should be set now */ ap_send_http_header(cid->r); /* Any data left should now be sent directly, * it may be raw if headlen was provided. */ if (termch && (termarg == 1)) { if (headlen == -1 && *termch) ap_rputs(termch, cid->r); else if (headlen > (size_t) (termch - head)) ap_rwrite(termch, headlen - (termch - head), cid->r); } if (cid->status == HTTP_INTERNAL_SERVER_ERROR) return FALSE; return TRUE; }
char *ApacheRequest_script_name(ApacheRequest *req) { request_rec *r = req->r; char *tmp; if (r->path_info && *r->path_info) { int path_info_start = ap_find_path_info(r->uri, r->path_info); tmp = ap_pstrndup(r->pool, r->uri, path_info_start); } else { tmp = r->uri; } return tmp; }
API_EXPORT(char *)ap_os_case_canonical_filename(pool *pPool, const char *szFile) { char *buf; char buf2[CCHMAXPATH]; int rc, len; char *pos; /* Remove trailing slash unless it's a root directory */ len = strlen(szFile); buf = ap_pstrndup(pPool, szFile, len); if (len > 3 && buf[len-1] == '/') buf[--len] = 0; if (buf[0] == '/' && buf[1] == '/') { /* A UNC path */ if (strchr(buf+2, '/') == NULL) { /* Allow // or //server */ return ap_pstrdup(pPool, buf); } } rc = DosQueryPathInfo(buf, FIL_QUERYFULLNAME, buf2, sizeof(buf2)); if (rc) { if ( rc != ERROR_INVALID_NAME ) { ap_log_error(APLOG_MARK, APLOG_ERR|APLOG_NOERRNO, NULL, "OS/2 error %d for file %s", rc, szFile); } return ap_pstrdup(pPool, szFile); } /* Switch backslashes to forward */ for (pos=buf2; *pos; pos++) if (*pos == '\\') *pos = '/'; return ap_pstrdup(pPool, buf2); }
/* Obtain the Request-URI from the original request-line, returning * a new string from the request pool containing the URI or "". */ static char *original_uri(request_rec *r) { char *first, *last; if (r->the_request == NULL) { return (char *) ap_pcalloc(r->pool, 1); } first = r->the_request; /* use the request-line */ while (*first && !ap_isspace(*first)) { ++first; /* skip over the method */ } while (ap_isspace(*first)) { ++first; /* and the space(s) */ } last = first; while (*last && !ap_isspace(*last)) { ++last; /* end at next whitespace */ } return ap_pstrndup(r->pool, first, last - first); }
/* XXX: Is there is still an O(n^2) attack possible here? Please detail. */ BOOL WINAPI ServerSupportFunction (HCONN hConn, DWORD dwHSERequest, LPVOID lpvBuffer, LPDWORD lpdwSize, LPDWORD lpdwDataType) { isapi_cid *cid = (isapi_cid *)hConn; request_rec *r = cid->r; request_rec *subreq; switch (dwHSERequest) { case 1: /* HSE_REQ_SEND_URL_REDIRECT_RESP */ /* Set the status to be returned when the HttpExtensionProc() * is done. * WARNING: Microsoft now advertises HSE_REQ_SEND_URL_REDIRECT_RESP * and HSE_REQ_SEND_URL as equivalant per the Jan 2000 SDK. * They most definately are not, even in their own samples. */ ap_table_set(r->headers_out, "Location", lpvBuffer); cid->status = cid->r->status = cid->ecb->dwHttpStatusCode = HTTP_MOVED_TEMPORARILY; return TRUE; case 2: /* HSE_REQ_SEND_URL */ /* Soak up remaining input (there should be none) */ if (r->remaining > 0) { char argsbuffer[HUGE_STRING_LEN]; while (ap_get_client_block(r, argsbuffer, HUGE_STRING_LEN) > 0); } /* Reset the method to GET */ r->method = ap_pstrdup(r->pool, "GET"); r->method_number = M_GET; /* Don't let anyone think there's still data */ ap_table_unset(r->headers_in, "Content-Length"); /* AV fault per PR3598 - redirected path is lost! */ (char*)lpvBuffer = ap_pstrdup(r->pool, (char*)lpvBuffer); ap_internal_redirect((char*)lpvBuffer, r); return TRUE; case 3: /* HSE_REQ_SEND_RESPONSE_HEADER */ { /* Parse them out, or die trying */ DWORD statlen = 0, headlen = 0; if (lpvBuffer) statlen = strlen((char*) lpvBuffer); if (lpdwDataType) headlen = strlen((char*) lpdwDataType); return SendResponseHeaderEx(cid, (char*) lpvBuffer, (char*) lpdwDataType, statlen, headlen); } case 4: /* HSE_REQ_DONE_WITH_SESSION */ /* Do nothing... since we don't support async I/O, they'll * return from HttpExtensionProc soon */ return TRUE; case 1001: /* HSE_REQ_MAP_URL_TO_PATH */ { /* Map a URL to a filename */ char *file = (char *)lpvBuffer; DWORD len; subreq = ap_sub_req_lookup_uri(ap_pstrndup(r->pool, file, *lpdwSize), r); len = ap_cpystrn(file, subreq->filename, *lpdwSize) - file; /* IIS puts a trailing slash on directories, Apache doesn't */ if (S_ISDIR (subreq->finfo.st_mode)) { if (len < *lpdwSize - 1) { file[len++] = '\\'; file[len] = '\0'; } } *lpdwSize = len; return TRUE; } case 1002: /* HSE_REQ_GET_SSPI_INFO */ if (LogNotSupported) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING, r, "ISAPI ServerSupportFunction HSE_REQ_GET_SSPI_INFO " "is not supported: %s", r->filename); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; case 1003: /* HSE_APPEND_LOG_PARAMETER */ /* Log lpvBuffer, of lpdwSize bytes, in the URI Query (cs-uri-query) field * This code will do for now... */ ap_table_set(r->notes, "isapi-parameter", (char*) lpvBuffer); if (AppendLogToQuery) { if (r->args) r->args = ap_pstrcat(r->pool, r->args, (char*) lpvBuffer, NULL); else r->args = ap_pstrdup(r->pool, (char*) lpvBuffer); } if (AppendLogToErrors) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_INFO, r, "ISAPI %s: %s", cid->r->filename, (char*) lpvBuffer); return TRUE; /* We don't support all this async I/O, Microsoft-specific stuff */ case 1005: /* HSE_REQ_IO_COMPLETION */ case 1006: /* HSE_REQ_TRANSMIT_FILE */ if (LogNotSupported) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING, r, "ISAPI asynchronous I/O not supported: %s", r->filename); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; case 1007: /* HSE_REQ_REFRESH_ISAPI_ACL */ /* Since we don't override the user ID and access, we can't reset. */ if (LogNotSupported) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING, r, "ISAPI ServerSupportFunction " "HSE_REQ_REFRESH_ISAPI_ACL " "is not supported: %s", r->filename); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; case 1008: /* HSE_REQ_IS_KEEP_CONN */ *((LPBOOL) lpvBuffer) = (r->connection->keepalive == 1); return TRUE; case 1010: /* HSE_REQ_ASYNC_READ_CLIENT */ if (LogNotSupported) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING, r, "ISAPI asynchronous I/O not supported: %s", r->filename); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; case 1011: /* HSE_REQ_GET_IMPERSONATION_TOKEN Added in ISAPI 4.0 */ if (LogNotSupported) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING, r, "ISAPI ServerSupportFunction " "HSE_REQ_GET_IMPERSONATION_TOKEN " "is not supported: %s", r->filename); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; #ifdef HSE_REQ_MAP_URL_TO_PATH_EX case 1012: /* HSE_REQ_MAP_URL_TO_PATH_EX */ { /* Map a URL to a filename */ LPHSE_URL_MAPEX_INFO info = (LPHSE_URL_MAPEX_INFO) lpdwDataType; char* test_uri = ap_pstrndup(r->pool, (char *)lpvBuffer, *lpdwSize); subreq = ap_sub_req_lookup_uri(test_uri, r); info->cchMatchingURL = strlen(test_uri); info->cchMatchingPath = ap_cpystrn(info->lpszPath, subreq->filename, MAX_PATH) - info->lpszPath; /* Mapping started with assuming both strings matched. * Now roll on the path_info as a mismatch and handle * terminating slashes for directory matches. */ if (subreq->path_info && *subreq->path_info) { ap_cpystrn(info->lpszPath + info->cchMatchingPath, subreq->path_info, MAX_PATH - info->cchMatchingPath); info->cchMatchingURL -= strlen(subreq->path_info); if (S_ISDIR(subreq->finfo.st_mode) && info->cchMatchingPath < MAX_PATH - 1) { /* roll forward over path_info's first slash */ ++info->cchMatchingPath; ++info->cchMatchingURL; } } else if (S_ISDIR(subreq->finfo.st_mode) && info->cchMatchingPath < MAX_PATH - 1) { /* Add a trailing slash for directory */ info->lpszPath[info->cchMatchingPath++] = '/'; info->lpszPath[info->cchMatchingPath] = '\0'; } /* If the matched isn't a file, roll match back to the prior slash */ if (!subreq->finfo.st_mode) { while (info->cchMatchingPath && info->cchMatchingURL) { if (info->lpszPath[info->cchMatchingPath - 1] == '/') break; --info->cchMatchingPath; --info->cchMatchingURL; } } /* Paths returned with back slashes */ for (test_uri = info->lpszPath; *test_uri; ++test_uri) if (*test_uri == '/') *test_uri = '\\'; /* is a combination of: * HSE_URL_FLAGS_READ 0x001 Allow read * HSE_URL_FLAGS_WRITE 0x002 Allow write * HSE_URL_FLAGS_EXECUTE 0x004 Allow execute * HSE_URL_FLAGS_SSL 0x008 Require SSL * HSE_URL_FLAGS_DONT_CACHE 0x010 Don't cache (VRoot only) * HSE_URL_FLAGS_NEGO_CERT 0x020 Allow client SSL cert * HSE_URL_FLAGS_REQUIRE_CERT 0x040 Require client SSL cert * HSE_URL_FLAGS_MAP_CERT 0x080 Map client SSL cert to account * HSE_URL_FLAGS_SSL128 0x100 Require 128-bit SSL cert * HSE_URL_FLAGS_SCRIPT 0x200 Allow script execution * * XxX: As everywhere, EXEC flags could use some work... * and this could go further with more flags, as desired. */ info->dwFlags = (subreq->finfo.st_mode & _S_IREAD ? 0x001 : 0) | (subreq->finfo.st_mode & _S_IWRITE ? 0x002 : 0) | (subreq->finfo.st_mode & _S_IEXEC ? 0x204 : 0); return TRUE; } #endif case 1014: /* HSE_REQ_ABORTIVE_CLOSE */ if (LogNotSupported) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING, r, "ISAPI ServerSupportFunction HSE_REQ_ABORTIVE_CLOSE" " is not supported: %s", r->filename); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; case 1015: /* HSE_REQ_GET_CERT_INFO_EX Added in ISAPI 4.0 */ if (LogNotSupported) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING, r, "ISAPI ServerSupportFunction " "HSE_REQ_GET_CERT_INFO_EX " "is not supported: %s", r->filename); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; #ifdef HSE_REQ_SEND_RESPONSE_HEADER_EX case 1016: /* HSE_REQ_SEND_RESPONSE_HEADER_EX Added in ISAPI 4.0 */ { LPHSE_SEND_HEADER_EX_INFO shi = (LPHSE_SEND_HEADER_EX_INFO) lpvBuffer; /* XXX: ignore shi->fKeepConn? We shouldn't need the advise */ /* r->connection->keepalive = shi->fKeepConn; */ return SendResponseHeaderEx(cid, shi->pszStatus, shi->pszHeader, shi->cchStatus, shi->cchHeader); } #endif case 1017: /* HSE_REQ_CLOSE_CONNECTION Added after ISAPI 4.0 */ if (LogNotSupported) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING, r, "ISAPI ServerSupportFunction " "HSE_REQ_CLOSE_CONNECTION " "is not supported: %s", r->filename); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; case 1018: /* HSE_REQ_IS_CONNECTED Added after ISAPI 4.0 */ /* Returns True if client is connected c.f. MSKB Q188346 * XXX: That statement is very ambigious... assuming the * identical return mechanism as HSE_REQ_IS_KEEP_CONN. */ *((LPBOOL) lpvBuffer) = (r->connection->aborted == 0); return TRUE; case 1020: /* HSE_REQ_EXTENSION_TRIGGER Added after ISAPI 4.0 */ /* Undocumented - defined by the Microsoft Jan '00 Platform SDK */ if (LogNotSupported) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING, r, "ISAPI ServerSupportFunction " "HSE_REQ_EXTENSION_TRIGGER " "is not supported: %s", r->filename); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; default: if (LogNotSupported) ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING, r, "ISAPI ServerSupportFunction (%d) not supported: " "%s", dwHSERequest, r->filename); SetLastError(ERROR_INVALID_PARAMETER); return FALSE; } }
/* parse_uri_components(): * Parse a given URI, fill in all supplied fields of a uri_components * structure. This eliminates the necessity of extracting host, port, * path, query info repeatedly in the modules. * Side effects: * - fills in fields of uri_components *uptr * - none on any of the r->* fields */ API_EXPORT(int) ap_parse_uri_components(pool *p, const char *uri, uri_components * uptr) { const char *s; const char *s1; const char *hostinfo; char *endstr; int port; /* Initialize the structure. parse_uri() and parse_uri_components() * can be called more than once per request. */ memset(uptr, '\0', sizeof(*uptr)); uptr->is_initialized = 1; /* We assume the processor has a branch predictor like most -- * it assumes forward branches are untaken and backwards are taken. That's * the reason for the gotos. -djg */ if (uri[0] == '/') { deal_with_path: /* we expect uri to point to first character of path ... remember * that the path could be empty -- http://foobar?query for example */ s = uri; while ((uri_delims[*(unsigned char *) s] & NOTEND_PATH) == 0) { ++s; } if (s != uri) { uptr->path = ap_pstrndup(p, uri, s - uri); } if (*s == 0) { return HTTP_OK; } if (*s == '?') { ++s; s1 = strchr(s, '#'); if (s1) { uptr->fragment = ap_pstrdup(p, s1 + 1); uptr->query = ap_pstrndup(p, s, s1 - s); } else { uptr->query = ap_pstrdup(p, s); } return HTTP_OK; } /* otherwise it's a fragment */ uptr->fragment = ap_pstrdup(p, s + 1); return HTTP_OK; } /* find the scheme: */ s = uri; while ((uri_delims[*(unsigned char *) s] & NOTEND_SCHEME) == 0) { ++s; } /* scheme must be non-empty and followed by :// */ if (s == uri || s[0] != ':' || s[1] != '/' || s[2] != '/') { goto deal_with_path; /* backwards predicted taken! */ } uptr->scheme = ap_pstrndup(p, uri, s - uri); s += 3; hostinfo = s; while ((uri_delims[*(unsigned char *) s] & NOTEND_HOSTINFO) == 0) { ++s; } uri = s; /* whatever follows hostinfo is start of uri */ uptr->hostinfo = ap_pstrndup(p, hostinfo, uri - hostinfo); /* If there's a username:password@host:port, the @ we want is the last @... * too bad there's no memrchr()... For the C purists, note that hostinfo * is definately not the first character of the original uri so therefore * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C. */ do { --s; } while (s >= hostinfo && *s != '@'); if (s < hostinfo) { /* again we want the common case to be fall through */ deal_with_host: /* We expect hostinfo to point to the first character of * the hostname. If there's a port it is the first colon. */ s = memchr(hostinfo, ':', uri - hostinfo); if (s == NULL) { /* we expect the common case to have no port */ uptr->hostname = ap_pstrndup(p, hostinfo, uri - hostinfo); goto deal_with_path; } uptr->hostname = ap_pstrndup(p, hostinfo, s - hostinfo); ++s; uptr->port_str = ap_pstrndup(p, s, uri - s); if (uri != s) { port = ap_strtol(uptr->port_str, &endstr, 10); uptr->port = port; if (*endstr == '\0') { goto deal_with_path; } /* Invalid characters after ':' found */ return HTTP_BAD_REQUEST; } uptr->port = ap_default_port_for_scheme(uptr->scheme); goto deal_with_path; } /* first colon delimits username:password */ s1 = memchr(hostinfo, ':', s - hostinfo); if (s1) { uptr->user = ap_pstrndup(p, hostinfo, s1 - hostinfo); ++s1; uptr->password = ap_pstrndup(p, s1, s - s1); } else { uptr->user = ap_pstrndup(p, hostinfo, s - hostinfo); } hostinfo = s + 1; goto deal_with_host; }
API_EXPORT(void) ap_add_cgi_vars(request_rec *r) { table *e = r->subprocess_env; ap_table_setn(e, "GATEWAY_INTERFACE", "CGI/1.1"); ap_table_setn(e, "SERVER_PROTOCOL", r->protocol); ap_table_setn(e, "REQUEST_METHOD", r->method); ap_table_setn(e, "QUERY_STRING", r->args ? r->args : ""); ap_table_setn(e, "REQUEST_URI", original_uri(r)); /* Note that the code below special-cases scripts run from includes, * because it "knows" that the sub_request has been hacked to have the * args and path_info of the original request, and not any that may have * come with the script URI in the include command. Ugh. */ if (!strcmp(r->protocol, "INCLUDED")) { ap_table_setn(e, "SCRIPT_NAME", r->uri); if (r->path_info && *r->path_info) { ap_table_setn(e, "PATH_INFO", r->path_info); } } else if (!r->path_info || !*r->path_info) { ap_table_setn(e, "SCRIPT_NAME", r->uri); } else { int path_info_start = ap_find_path_info(r->uri, r->path_info); ap_table_setn(e, "SCRIPT_NAME", ap_pstrndup(r->pool, r->uri, path_info_start)); ap_table_setn(e, "PATH_INFO", r->path_info); } if (r->path_info && r->path_info[0]) { /* * To get PATH_TRANSLATED, treat PATH_INFO as a URI path. * Need to re-escape it for this, since the entire URI was * un-escaped before we determined where the PATH_INFO began. */ request_rec *pa_req; pa_req = ap_sub_req_lookup_uri(ap_escape_uri(r->pool, r->path_info), r); if (pa_req->filename) { #ifdef WIN32 char buffer[HUGE_STRING_LEN]; #endif char *pt = ap_pstrcat(r->pool, pa_req->filename, pa_req->path_info, NULL); #ifdef WIN32 /* We need to make this a real Windows path name */ GetFullPathName(pt, HUGE_STRING_LEN, buffer, NULL); ap_table_setn(e, "PATH_TRANSLATED", ap_pstrdup(r->pool, buffer)); #else ap_table_setn(e, "PATH_TRANSLATED", pt); #endif } ap_destroy_sub_req(pa_req); } }
static int check_speling(request_rec *r) { spconfig *cfg; char *good, *bad, *postgood, *url; int filoc, dotloc, urlen, pglen; DIR *dirp; struct DIR_TYPE *dir_entry; array_header *candidates = NULL; cfg = ap_get_module_config(r->per_dir_config, &speling_module); if (!cfg->enabled) { return DECLINED; } /* We only want to worry about GETs */ if (r->method_number != M_GET) { return DECLINED; } /* We've already got a file of some kind or another */ if (r->proxyreq != NOT_PROXY || (r->finfo.st_mode != 0)) { return DECLINED; } /* This is a sub request - don't mess with it */ if (r->main) { return DECLINED; } /* * The request should end up looking like this: * r->uri: /correct-url/mispelling/more * r->filename: /correct-file/mispelling r->path_info: /more * * So we do this in steps. First break r->filename into two pieces */ filoc = ap_rind(r->filename, '/'); /* * Don't do anything if the request doesn't contain a slash, or * requests "/" */ if (filoc == -1 || strcmp(r->uri, "/") == 0) { return DECLINED; } /* good = /correct-file */ good = ap_pstrndup(r->pool, r->filename, filoc); /* bad = mispelling */ bad = ap_pstrdup(r->pool, r->filename + filoc + 1); /* postgood = mispelling/more */ postgood = ap_pstrcat(r->pool, bad, r->path_info, NULL); urlen = strlen(r->uri); pglen = strlen(postgood); /* Check to see if the URL pieces add up */ if (strcmp(postgood, r->uri + (urlen - pglen))) { return DECLINED; } /* url = /correct-url */ url = ap_pstrndup(r->pool, r->uri, (urlen - pglen)); /* Now open the directory and do ourselves a check... */ dirp = ap_popendir(r->pool, good); if (dirp == NULL) { /* Oops, not a directory... */ return DECLINED; } candidates = ap_make_array(r->pool, 2, sizeof(misspelled_file)); dotloc = ap_ind(bad, '.'); if (dotloc == -1) { dotloc = strlen(bad); } while ((dir_entry = readdir(dirp)) != NULL) { sp_reason q; /* * If we end up with a "fixed" URL which is identical to the * requested one, we must have found a broken symlink or some such. * Do _not_ try to redirect this, it causes a loop! */ if (strcmp(bad, dir_entry->d_name) == 0) { ap_pclosedir(r->pool, dirp); return OK; } /* * miscapitalization errors are checked first (like, e.g., lower case * file, upper case request) */ else if (strcasecmp(bad, dir_entry->d_name) == 0) { misspelled_file *sp_new; sp_new = (misspelled_file *) ap_push_array(candidates); sp_new->name = ap_pstrdup(r->pool, dir_entry->d_name); sp_new->quality = SP_MISCAPITALIZED; } /* * simple typing errors are checked next (like, e.g., * missing/extra/transposed char) */ else if ((q = spdist(bad, dir_entry->d_name)) != SP_VERYDIFFERENT) { misspelled_file *sp_new; sp_new = (misspelled_file *) ap_push_array(candidates); sp_new->name = ap_pstrdup(r->pool, dir_entry->d_name); sp_new->quality = q; } /* * The spdist() should have found the majority of the misspelled * requests. It is of questionable use to continue looking for * files with the same base name, but potentially of totally wrong * type (index.html <-> index.db). * I would propose to not set the WANT_BASENAME_MATCH define. * 08-Aug-1997 <*****@*****.**> * * However, Alexei replied giving some reasons to add it anyway: * > Oh, by the way, I remembered why having the * > extension-stripping-and-matching stuff is a good idea: * > * > If you're using MultiViews, and have a file named foobar.html, * > which you refer to as "foobar", and someone tried to access * > "Foobar", mod_speling won't find it, because it won't find * > anything matching that spelling. With the extension-munging, * > it would locate "foobar.html". Not perfect, but I ran into * > that problem when I first wrote the module. */ else { #ifdef WANT_BASENAME_MATCH /* * Okay... we didn't find anything. Now we take out the hard-core * power tools. There are several cases here. Someone might have * entered a wrong extension (.htm instead of .html or vice * versa) or the document could be negotiated. At any rate, now * we just compare stuff before the first dot. If it matches, we * figure we got us a match. This can result in wrong things if * there are files of different content types but the same prefix * (e.g. foo.gif and foo.html) This code will pick the first one * it finds. Better than a Not Found, though. */ int entloc = ap_ind(dir_entry->d_name, '.'); if (entloc == -1) { entloc = strlen(dir_entry->d_name); } if ((dotloc == entloc) && !strncasecmp(bad, dir_entry->d_name, dotloc)) { misspelled_file *sp_new; sp_new = (misspelled_file *) ap_push_array(candidates); sp_new->name = ap_pstrdup(r->pool, dir_entry->d_name); sp_new->quality = SP_VERYDIFFERENT; } #endif } } ap_pclosedir(r->pool, dirp); if (candidates->nelts != 0) { /* Wow... we found us a mispelling. Construct a fixed url */ char *nuri; const char *ref; misspelled_file *variant = (misspelled_file *) candidates->elts; int i; ref = ap_table_get(r->headers_in, "Referer"); qsort((void *) candidates->elts, candidates->nelts, sizeof(misspelled_file), sort_by_quality); /* * Conditions for immediate redirection: * a) the first candidate was not found by stripping the suffix * AND b) there exists only one candidate OR the best match is not * ambiguous * then return a redirection right away. */ if (variant[0].quality != SP_VERYDIFFERENT && (candidates->nelts == 1 || variant[0].quality != variant[1].quality)) { nuri = ap_escape_uri(r->pool, ap_pstrcat(r->pool, url, variant[0].name, r->path_info, NULL)); if (r->parsed_uri.query) nuri = ap_pstrcat(r->pool, nuri, "?", r->parsed_uri.query, NULL); ap_table_setn(r->headers_out, "Location", ap_construct_url(r->pool, nuri, r)); ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_INFO, r, ref ? "Fixed spelling: %s to %s from %s" : "Fixed spelling: %s to %s", r->uri, nuri, ref); return HTTP_MOVED_PERMANENTLY; } /* * Otherwise, a "[300] Multiple Choices" list with the variants is * returned. */ else { pool *p; table *notes; pool *sub_pool; array_header *t; array_header *v; if (r->main == NULL) { p = r->pool; notes = r->notes; } else { p = r->main->pool; notes = r->main->notes; } sub_pool = ap_make_sub_pool(p); t = ap_make_array(sub_pool, candidates->nelts * 8 + 8, sizeof(char *)); v = ap_make_array(sub_pool, candidates->nelts * 5, sizeof(char *)); /* Generate the response text. */ *(const char **)ap_push_array(t) = "The document name you requested (<code>"; *(const char **)ap_push_array(t) = ap_escape_html(sub_pool, r->uri); *(const char **)ap_push_array(t) = "</code>) could not be found on this server.\n" "However, we found documents with names similar " "to the one you requested.<p>" "Available documents:\n<ul>\n"; for (i = 0; i < candidates->nelts; ++i) { char *vuri; const char *reason; reason = sp_reason_str[(int) (variant[i].quality)]; /* The format isn't very neat... */ vuri = ap_pstrcat(sub_pool, url, variant[i].name, r->path_info, (r->parsed_uri.query != NULL) ? "?" : "", (r->parsed_uri.query != NULL) ? r->parsed_uri.query : "", NULL); *(const char **)ap_push_array(v) = "\""; *(const char **)ap_push_array(v) = ap_escape_uri(sub_pool, vuri); *(const char **)ap_push_array(v) = "\";\""; *(const char **)ap_push_array(v) = reason; *(const char **)ap_push_array(v) = "\""; *(const char **)ap_push_array(t) = "<li><a href=\""; *(const char **)ap_push_array(t) = ap_escape_uri(sub_pool, vuri); *(const char **)ap_push_array(t) = "\">"; *(const char **)ap_push_array(t) = ap_escape_html(sub_pool, vuri); *(const char **)ap_push_array(t) = "</a> ("; *(const char **)ap_push_array(t) = reason; *(const char **)ap_push_array(t) = ")\n"; /* * when we have printed the "close matches" and there are * more "distant matches" (matched by stripping the suffix), * then we insert an additional separator text to suggest * that the user LOOK CLOSELY whether these are really the * files she wanted. */ if (i > 0 && i < candidates->nelts - 1 && variant[i].quality != SP_VERYDIFFERENT && variant[i + 1].quality == SP_VERYDIFFERENT) { *(const char **)ap_push_array(t) = "</ul>\nFurthermore, the following related " "documents were found:\n<ul>\n"; } } *(const char **)ap_push_array(t) = "</ul>\n"; /* If we know there was a referring page, add a note: */ if (ref != NULL) { *(const char **)ap_push_array(t) = "Please consider informing the owner of the " "<a href=\""; *(const char **)ap_push_array(t) = ap_escape_uri(sub_pool, ref); *(const char **)ap_push_array(t) = "\">referring page</a> " "about the broken link.\n"; } /* Pass our table to http_protocol.c (see mod_negotiation): */ ap_table_setn(notes, "variant-list", ap_array_pstrcat(p, t, 0)); ap_table_mergen(r->subprocess_env, "VARIANTS", ap_array_pstrcat(p, v, ',')); ap_destroy_pool(sub_pool); ap_log_rerror(APLOG_MARK, APLOG_NOERRNO | APLOG_INFO, r, ref ? "Spelling fix: %s: %d candidates from %s" : "Spelling fix: %s: %d candidates", r->uri, candidates->nelts, ref); return HTTP_MULTIPLE_CHOICES; } } return OK; }