/* xlate_out_filter() handles (almost) arbitrary conversions from one charset * to another... * translation is determined in the fixup hook (find_code_page), which is * where the filter's context data is set up... the context data gives us * the translation handle */ static apr_status_t xlate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) { charset_req_t *reqinfo = ap_get_module_config(f->r->request_config, &charset_lite_module); charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config, &charset_lite_module); charset_filter_ctx_t *ctx = f->ctx; apr_bucket *dptr, *consumed_bucket; const char *cur_str; apr_size_t cur_len, cur_avail; char tmp[OUTPUT_XLATE_BUF_SIZE]; apr_size_t space_avail; int done; apr_status_t rv = APR_SUCCESS; if (!ctx) { /* this is SetOutputFilter path; grab the preallocated context, * if any; note that if we decided not to do anything in an earlier * handler, we won't even have a reqinfo */ if (reqinfo) { ctx = f->ctx = reqinfo->output_ctx; reqinfo->output_ctx = NULL; /* prevent SNAFU if user coded us twice * in the filter chain; we can't have two * instances using the same context */ } if (!ctx) { /* no idea how to translate; don't do anything */ ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t)); ctx->dc = dc; ctx->noop = 1; } } /* Check the mime type to see if translation should be performed. */ if (!ctx->noop && ctx->xlate == NULL) { const char *mime_type = f->r->content_type; if (mime_type && (strncasecmp(mime_type, "text/", 5) == 0 || #if APR_CHARSET_EBCDIC /* On an EBCDIC machine, be willing to translate mod_autoindex- * generated output. Otherwise, it doesn't look too cool. * * XXX This isn't a perfect fix because this doesn't trigger us * to convert from the charset of the source code to ASCII. The * general solution seems to be to allow a generator to set an * indicator in the r specifying that the body is coded in the * implementation character set (i.e., the charset of the source * code). This would get several different types of documents * translated properly: mod_autoindex output, mod_status output, * mod_info output, hard-coded error documents, etc. */ strcmp(mime_type, DIR_MAGIC_TYPE) == 0 || #endif strncasecmp(mime_type, "message/", 8) == 0 || dc->force_xlate == FX_FORCE)) { rv = apr_xlate_open(&ctx->xlate, dc->charset_default, dc->charset_source, f->r->pool); if (rv != APR_SUCCESS) { ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01453) "can't open translation %s->%s", dc->charset_source, dc->charset_default); ctx->noop = 1; } else { if (apr_xlate_sb_get(ctx->xlate, &ctx->is_sb) != APR_SUCCESS) { ctx->is_sb = 0; } } } else { ctx->noop = 1; if (mime_type) { ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r, "mime type is %s; no translation selected", mime_type); } } } ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r, "xlate_out_filter() - " "charset_source: %s charset_default: %s", dc && dc->charset_source ? dc->charset_source : "(none)", dc && dc->charset_default ? dc->charset_default : "(none)"); if (!ctx->ran) { /* filter never ran before */ chk_filter_chain(f); ctx->ran = 1; if (!ctx->noop && !ctx->is_sb) { /* We're not converting between two single-byte charsets, so unset * Content-Length since it is unlikely to remain the same. */ apr_table_unset(f->r->headers_out, "Content-Length"); } } if (ctx->noop) { return ap_pass_brigade(f->next, bb); } dptr = APR_BRIGADE_FIRST(bb); done = 0; cur_len = 0; space_avail = sizeof(tmp); consumed_bucket = NULL; while (!done) { if (!cur_len) { /* no bytes left to process in the current bucket... */ if (consumed_bucket) { apr_bucket_delete(consumed_bucket); consumed_bucket = NULL; } if (dptr == APR_BRIGADE_SENTINEL(bb)) { break; } if (APR_BUCKET_IS_EOS(dptr)) { cur_len = -1; /* XXX yuck, but that tells us to send * eos down; when we minimize our bb construction * we'll fix this crap */ if (ctx->saved) { /* Oops... we have a partial char from the previous bucket * that won't be completed because there's no more data. */ rv = APR_INCOMPLETE; ctx->ees = EES_INCOMPLETE_CHAR; } break; } if (APR_BUCKET_IS_METADATA(dptr)) { apr_bucket *metadata_bucket; metadata_bucket = dptr; dptr = APR_BUCKET_NEXT(dptr); APR_BUCKET_REMOVE(metadata_bucket); rv = send_bucket_downstream(f, metadata_bucket); if (rv != APR_SUCCESS) { done = 1; } continue; } rv = apr_bucket_read(dptr, &cur_str, &cur_len, APR_BLOCK_READ); if (rv != APR_SUCCESS) { ctx->ees = EES_BUCKET_READ; break; } consumed_bucket = dptr; /* for axing when we're done reading it */ dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the * next bucket */ } /* Try to fill up our tmp buffer with translated data. */ cur_avail = cur_len; if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */ if (ctx->saved) { /* Rats... we need to finish a partial character from the previous * bucket. */ char *tmp_tmp; tmp_tmp = tmp + sizeof(tmp) - space_avail; rv = finish_partial_char(ctx, &cur_str, &cur_len, &tmp_tmp, &space_avail); } else { rv = apr_xlate_conv_buffer(ctx->xlate, cur_str, &cur_avail, tmp + sizeof(tmp) - space_avail, &space_avail); /* Update input ptr and len after consuming some bytes */ cur_str += cur_len - cur_avail; cur_len = cur_avail; if (rv == APR_INCOMPLETE) { /* partial character at end of input */ /* We need to save the final byte(s) for next time; we can't * convert it until we look at the next bucket. */ rv = set_aside_partial_char(ctx, cur_str, cur_len); cur_len = 0; } } } if (rv != APR_SUCCESS) { /* bad input byte or partial char too big to store */ done = 1; } if (space_avail < XLATE_MIN_BUFF_LEFT) { /* It is time to flush, as there is not enough space left in the * current output buffer to bother with converting more data. */ rv = send_downstream(f, tmp, sizeof(tmp) - space_avail); if (rv != APR_SUCCESS) { done = 1; } /* tmp is now empty */ space_avail = sizeof(tmp); } } if (rv == APR_SUCCESS) { if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */ rv = send_downstream(f, tmp, sizeof(tmp) - space_avail); } } if (rv == APR_SUCCESS) { if (cur_len == -1) { rv = send_eos(f); } } else { log_xlate_error(f, rv); } return rv; }
static apr_status_t xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb, ap_input_mode_t mode, apr_read_type_e block, apr_off_t readbytes) { apr_status_t rv; charset_req_t *reqinfo = ap_get_module_config(f->r->request_config, &charset_lite_module); charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config, &charset_lite_module); charset_filter_ctx_t *ctx = f->ctx; apr_size_t buffer_size; int hit_eos; if (!ctx) { /* this is SetInputFilter path; grab the preallocated context, * if any; note that if we decided not to do anything in an earlier * handler, we won't even have a reqinfo */ if (reqinfo) { ctx = f->ctx = reqinfo->input_ctx; reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice * in the filter chain; we can't have two * instances using the same context */ } if (!ctx) { /* no idea how to translate; don't do anything */ ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t)); ctx->dc = dc; ctx->noop = 1; } } ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r, "xlate_in_filter() - " "charset_source: %s charset_default: %s", dc && dc->charset_source ? dc->charset_source : "(none)", dc && dc->charset_default ? dc->charset_default : "(none)"); if (!ctx->ran) { /* filter never ran before */ chk_filter_chain(f); ctx->ran = 1; if (!ctx->noop && !ctx->is_sb && apr_table_get(f->r->headers_in, "Content-Length")) { /* A Content-Length header is present, but it won't be valid after * conversion because we're not converting between two single-byte * charsets. This will affect most CGI scripts and may affect * some modules. * Content-Length can't be unset here because that would break * being able to read the request body. * Processing of chunked request bodies is not impacted by this * filter since the the length was not declared anyway. */ ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, f->r, "Request body length may change, resulting in " "misprocessing by some modules or scripts"); } } if (ctx->noop) { return ap_get_brigade(f->next, bb, mode, block, readbytes); } if (APR_BRIGADE_EMPTY(ctx->bb)) { if ((rv = ap_get_brigade(f->next, bb, mode, block, readbytes)) != APR_SUCCESS) { return rv; } } else { APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */ } buffer_size = INPUT_XLATE_BUF_SIZE; rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos); if (rv == APR_SUCCESS) { if (!hit_eos) { /* move anything leftover into our context for next time; * we don't currently "set aside" since the data came from * down below, but I suspect that for long-term we need to * do that */ APR_BRIGADE_CONCAT(ctx->bb, bb); } if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */ apr_bucket *e; e = apr_bucket_heap_create(ctx->tmp, INPUT_XLATE_BUF_SIZE - buffer_size, NULL, f->r->connection->bucket_alloc); /* make sure we insert at the head, because there may be * an eos bucket already there, and the eos bucket should * come after the data */ APR_BRIGADE_INSERT_HEAD(bb, e); } else { /* XXX need to get some more data... what if the last brigade * we got had only the first byte of a multibyte char? we need * to grab more data from the network instead of returning an * empty brigade */ } /* If we have any metadata at the head of ctx->bb, go ahead and move it * onto the end of bb to be returned to our caller. */ if (!APR_BRIGADE_EMPTY(ctx->bb)) { apr_bucket *b = APR_BRIGADE_FIRST(ctx->bb); while (b != APR_BRIGADE_SENTINEL(ctx->bb) && APR_BUCKET_IS_METADATA(b)) { APR_BUCKET_REMOVE(b); APR_BRIGADE_INSERT_TAIL(bb, b); b = APR_BRIGADE_FIRST(ctx->bb); } } } else { log_xlate_error(f, rv); } return rv; }
static int xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb, ap_input_mode_t mode, apr_read_type_e block, apr_off_t readbytes) { apr_status_t rv; charset_req_t *reqinfo = ap_get_module_config(f->r->request_config, &charset_lite_module); charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config, &charset_lite_module); charset_filter_ctx_t *ctx = f->ctx; apr_size_t buffer_size; int hit_eos; if (!ctx) { /* this is SetInputFilter path; grab the preallocated context, * if any; note that if we decided not to do anything in an earlier * handler, we won't even have a reqinfo */ if (reqinfo) { ctx = f->ctx = reqinfo->input_ctx; reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice * in the filter chain; we can't have two * instances using the same context */ } if (!ctx) { /* no idea how to translate; don't do anything */ ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t)); ctx->dc = dc; ctx->noop = 1; } } if (dc->debug >= DBGLVL_GORY) { ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "xlate_in_filter() - " "charset_source: %s charset_default: %s", dc && dc->charset_source ? dc->charset_source : "(none)", dc && dc->charset_default ? dc->charset_default : "(none)"); } if (!ctx->ran) { /* filter never ran before */ chk_filter_chain(f); ctx->ran = 1; } if (ctx->noop) { return ap_get_brigade(f->next, bb, mode, block, readbytes); } if (APR_BRIGADE_EMPTY(ctx->bb)) { if ((rv = ap_get_brigade(f->next, bb, mode, block, readbytes)) != APR_SUCCESS) { return rv; } } else { APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */ } buffer_size = INPUT_XLATE_BUF_SIZE; rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos); if (rv == APR_SUCCESS) { if (!hit_eos) { /* move anything leftover into our context for next time; * we don't currently "set aside" since the data came from * down below, but I suspect that for long-term we need to * do that */ APR_BRIGADE_CONCAT(ctx->bb, bb); } if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */ apr_bucket *e; e = apr_bucket_heap_create(ctx->tmp, INPUT_XLATE_BUF_SIZE - buffer_size, NULL, f->r->connection->bucket_alloc); /* make sure we insert at the head, because there may be * an eos bucket already there, and the eos bucket should * come after the data */ APR_BRIGADE_INSERT_HEAD(bb, e); } else { /* XXX need to get some more data... what if the last brigade * we got had only the first byte of a multibyte char? we need * to grab more data from the network instead of returning an * empty brigade */ } } else { log_xlate_error(f, rv); } return rv; }