static xmlCharEncoding sniff_encoding(saxctxt* ctx, const char* cbuf, size_t bytes) { request_rec* r = ctx->f->r; cdn_conf* cfg = ctx->cfg; xmlCharEncoding ret; char* p; ap_regmatch_t match[2]; char* buf = (char*)cbuf; const char *encoding = 0; /* If we've got it in the HTTP headers, there's nothing to do */ if(r->content_type && (p = ap_strcasestr(r->content_type, "charset=")) && p != NULL) { p += 8; if((encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;")))) { ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "sniff_encoding: found charset %s in Content-Type", encoding); ret = xmlParseCharEncoding(encoding); if(((ret != XML_CHAR_ENCODING_ERROR) && (ret != XML_CHAR_ENCODING_NONE))) return ret; } } /* to sniff, first we look for BOM */ if(encoding == NULL) { if((ret = xmlDetectCharEncoding((const xmlChar*)buf, bytes)) != XML_CHAR_ENCODING_NONE) { ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "sniff_encoding: got charset from XML rules"); return ret; } /* If none of the above, look for a META-thingey */ if(ap_regexec(seek_meta_ctype, buf, 1, match, 0) == 0) { p = apr_pstrndup(r->pool, buf + match[0].rm_so, match[0].rm_eo - match[0].rm_so); if(ap_regexec(seek_charset, p, 2, match, 0) == 0) encoding = apr_pstrndup(r->pool, p+match[1].rm_so, match[1].rm_eo - match[1].rm_so); } } /* either it's set to something we found or it's still the default */ if(encoding) { ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "sniff_encoding: got charset %s from HTML META", encoding); ret = xmlParseCharEncoding(encoding); if(ret != XML_CHAR_ENCODING_ERROR && ret != XML_CHAR_ENCODING_NONE) return ret; ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "sniff_encoding: charset %s not supported", encoding); } /* Use configuration default as a last resort */ ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "sniff_encoding: no suitable charset information"); return (cfg->default_encoding == XML_CHAR_ENCODING_NONE) ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ; }
static apr_status_t filter_harness(ap_filter_t *f, apr_bucket_brigade *bb) { apr_status_t ret; #ifndef NO_PROTOCOL const char *cachecontrol; #endif harness_ctx *ctx = f->ctx; ap_filter_rec_t *filter = f->frec; if (f->r->status != 200 && !apr_table_get(f->r->subprocess_env, "filter-errordocs")) { ap_remove_output_filter(f); return ap_pass_brigade(f->next, bb); } filter_trace(f->c, filter->debug, f->frec->name, bb); /* look up a handler function if we haven't already set it */ if (!ctx->func) { #ifndef NO_PROTOCOL if (f->r->proxyreq) { if (filter->proto_flags & AP_FILTER_PROTO_NO_PROXY) { ap_remove_output_filter(f); return ap_pass_brigade(f->next, bb); } if (filter->proto_flags & AP_FILTER_PROTO_TRANSFORM) { cachecontrol = apr_table_get(f->r->headers_out, "Cache-Control"); if (cachecontrol) { if (ap_strcasestr(cachecontrol, "no-transform")) { ap_remove_output_filter(f); return ap_pass_brigade(f->next, bb); } } } } #endif if (!filter_lookup(f, filter)) { ap_remove_output_filter(f); return ap_pass_brigade(f->next, bb); } AP_DEBUG_ASSERT(ctx->func != NULL); } /* call the content filter with its own context, then restore our * context */ f->ctx = ctx->fctx; ret = ctx->func(f, bb); ctx->fctx = f->ctx; f->ctx = ctx; return ret; }
static int filter_lookup(ap_filter_t *f, ap_filter_rec_t *filter) { ap_filter_provider_t *provider; int match = 0; const char *err = NULL; request_rec *r = f->r; harness_ctx *ctx = f->ctx; provider_ctx *pctx; #ifndef NO_PROTOCOL unsigned int proto_flags; mod_filter_ctx *rctx = ap_get_module_config(r->request_config, &filter_module); #endif /* Check registered providers in order */ for (provider = filter->providers; provider; provider = provider->next) { if (provider->expr) { match = ap_expr_exec(r, provider->expr, &err); if (err) { ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01379) "Error evaluating filter dispatch condition: %s", err); match = 0; } ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r, "Expression condition for '%s' %s", provider->frec->name, match ? "matched" : "did not match"); } else if (r->content_type) { const char **type = provider->types; size_t len = strcspn(r->content_type, "; \t"); AP_DEBUG_ASSERT(type != NULL); ap_log_rerror(APLOG_MARK, APLOG_TRACE4, 0, r, "Content-Type '%s' ...", r->content_type); while (*type) { /* Handle 'content-type;charset=...' correctly */ if (strncmp(*type, r->content_type, len) == 0 && (*type)[len] == '\0') { ap_log_rerror(APLOG_MARK, APLOG_TRACE4, 0, r, "... matched '%s'", *type); match = 1; break; } else { ap_log_rerror(APLOG_MARK, APLOG_TRACE4, 0, r, "... did not match '%s'", *type); } type++; } ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r, "Content-Type condition for '%s' %s", provider->frec->name, match ? "matched" : "did not match"); } else { ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r, "Content-Type condition for '%s' did not match: " "no Content-Type", provider->frec->name); } if (match) { /* condition matches this provider */ #ifndef NO_PROTOCOL /* check protocol * * FIXME: * This is a quick hack and almost certainly buggy. * The idea is that by putting this in mod_filter, we relieve * filter implementations of the burden of fixing up HTTP headers * for cases that are routinely affected by filters. * * Default is ALWAYS to do nothing, so as not to tread on the * toes of filters which want to do it themselves. * */ proto_flags = provider->frec->proto_flags; /* some specific things can't happen in a proxy */ if (r->proxyreq) { if (proto_flags & AP_FILTER_PROTO_NO_PROXY) { /* can't use this provider; try next */ continue; } if (proto_flags & AP_FILTER_PROTO_TRANSFORM) { const char *str = apr_table_get(r->headers_out, "Cache-Control"); if (str) { if (ap_strcasestr(str, "no-transform")) { /* can't use this provider; try next */ continue; } } apr_table_addn(r->headers_out, "Warning", apr_psprintf(r->pool, "214 %s Transformation applied", r->hostname)); } } /* things that are invalidated if the filter transforms content */ if (proto_flags & AP_FILTER_PROTO_CHANGE) { apr_table_unset(r->headers_out, "Content-MD5"); apr_table_unset(r->headers_out, "ETag"); if (proto_flags & AP_FILTER_PROTO_CHANGE_LENGTH) { apr_table_unset(r->headers_out, "Content-Length"); } } /* no-cache is for a filter that has different effect per-hit */ if (proto_flags & AP_FILTER_PROTO_NO_CACHE) { apr_table_unset(r->headers_out, "Last-Modified"); apr_table_addn(r->headers_out, "Cache-Control", "no-cache"); } if (proto_flags & AP_FILTER_PROTO_NO_BYTERANGE) { apr_table_setn(r->headers_out, "Accept-Ranges", "none"); } else if (rctx && rctx->range) { /* restore range header we saved earlier */ apr_table_setn(r->headers_in, "Range", rctx->range); rctx->range = NULL; } #endif for (pctx = ctx->init_ctx; pctx; pctx = pctx->next) { if (pctx->provider == provider) { ctx->fctx = pctx->ctx ; } } ctx->func = provider->frec->filter_func.out_func; return 1; } } /* No provider matched */ return 0; }