php_http_message_parser_state_t php_http_message_parser_parse(php_http_message_parser_t *parser, php_http_buffer_t *buffer, unsigned flags, php_http_message_t **message)
{
	char *str = NULL;
	size_t len = 0;
	size_t cut = 0;

	while (buffer->used || !php_http_message_parser_states[php_http_message_parser_state_is(parser)].need_data) {
#if DBG_PARSER
		fprintf(stderr, "#MP: %s (f: %u, t:%d, l:%zu)\n", 
			php_http_message_parser_state_name(php_http_message_parser_state_is(parser)),
			flags, 
			message && *message ? (*message)->type : -1, 
			buffer->used
		);
		_dpf(0, buffer->data, buffer->used);
#endif

		switch (php_http_message_parser_state_pop(parser))
		{
			case PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE:
				return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);

			case PHP_HTTP_MESSAGE_PARSER_STATE_START:
			{
				char *ptr = buffer->data;

				while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
					++ptr;
				}

				php_http_buffer_cut(buffer, 0, ptr - buffer->data);

				if (buffer->used) {
					php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
				}
				break;
			}

			case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER:
			{
				unsigned header_parser_flags = (flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP) ? PHP_HTTP_HEADER_PARSER_CLEANUP : 0;

				switch (php_http_header_parser_parse(&parser->header, buffer, header_parser_flags, *message ? &(*message)->hdrs : NULL, (php_http_info_callback_t) php_http_message_info_callback, message)) {
					case PHP_HTTP_HEADER_PARSER_STATE_FAILURE:
						return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE;

					case PHP_HTTP_HEADER_PARSER_STATE_DONE:
						php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE);
						break;

					default:
						if (buffer->used || !(flags & PHP_HTTP_MESSAGE_PARSER_CLEANUP)) {
							return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER);
						} else {
							php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE);
						}
				}
				break;
			}

			case PHP_HTTP_MESSAGE_PARSER_STATE_HEADER_DONE:
			{
				zval h, *h_ptr, *h_loc = NULL, *h_con = NULL, *h_ce;
				zend_bool chunked = 0;
				zend_long content_length = -1;
				zend_string *content_range = NULL;

				/* Content-Range has higher precedence than Content-Length,
				 * and content-length denotes the original length of the entity,
				 * so let's *NOT* remove CR/CL, because that would fundamentally
				 * change the meaning of the whole message
				 */
				if ((h_ptr = php_http_message_header(*message, ZEND_STRL("Transfer-Encoding")))) {
					zend_string *zs = zval_get_string(h_ptr);

					chunked = zend_string_equals_literal(zs, "chunked");
					zend_string_release(zs);

					Z_TRY_ADDREF_P(h_ptr);
					zend_hash_str_update(&(*message)->hdrs, "X-Original-Transfer-Encoding", lenof("X-Original-Transfer-Encoding"), h_ptr);
					zend_hash_str_del(&(*message)->hdrs, "Transfer-Encoding", lenof("Transfer-Encoding"));

					/* reset */
					ZVAL_LONG(&h, 0);
					zend_hash_str_update(&(*message)->hdrs, "Content-Length", lenof("Content-Length"), &h);
				} else if ((h_ptr = php_http_message_header(*message, ZEND_STRL("Content-Length")))) {
					content_length = zval_get_long(h_ptr);
					Z_TRY_ADDREF_P(h_ptr);
					zend_hash_str_update(&(*message)->hdrs, "X-Original-Content-Length", lenof("X-Original-Content-Length"), h_ptr);
				}

				if ((content_range = php_http_message_header_string(*message, ZEND_STRL("Content-Range")))) {
					ZVAL_STR_COPY(&h, content_range);
					zend_hash_str_update(&(*message)->hdrs, "Content-Range", lenof("Content-Range"), &h);
				}

				/* so, if curl sees a 3xx code, a Location header and a Connection:close header
				 * it decides not to read the response body.
				 */
				if ((flags & PHP_HTTP_MESSAGE_PARSER_EMPTY_REDIRECTS)
				&&	(*message)->type == PHP_HTTP_RESPONSE
				&&	(*message)->http.info.response.code/100 == 3
				&&	(h_loc = php_http_message_header(*message, ZEND_STRL("Location")))
				&&	(h_con = php_http_message_header(*message, ZEND_STRL("Connection")))
				) {
					zend_string *con = zval_get_string(h_con);

					if (php_http_match(con->val, "close", PHP_HTTP_MATCH_WORD)) {
						zend_string_release(con);
						php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
						break;
					}
					zend_string_release(con);
				}

				if ((h_ce = php_http_message_header(*message, ZEND_STRL("Content-Encoding")))) {
					zend_string *ce = zval_get_string(h_ce);

					if (php_http_match(ce->val, "gzip", PHP_HTTP_MATCH_WORD)
					||	php_http_match(ce->val, "x-gzip", PHP_HTTP_MATCH_WORD)
					||	php_http_match(ce->val, "deflate", PHP_HTTP_MATCH_WORD)
					) {
						if (parser->inflate) {
							php_http_encoding_stream_reset(&parser->inflate);
						} else {
							parser->inflate = php_http_encoding_stream_init(NULL, php_http_encoding_stream_get_inflate_ops(), 0);
						}
						Z_TRY_ADDREF_P(h_ce);
						zend_hash_str_update(&(*message)->hdrs, "X-Original-Content-Encoding", lenof("X-Original-Content-Encoding"), h_ce);
						zend_hash_str_del(&(*message)->hdrs, "Content-Encoding", lenof("Content-Encoding"));
					}
					zend_string_release(ce);
				}

				if ((flags & PHP_HTTP_MESSAGE_PARSER_DUMB_BODIES)) {
					php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
				} else {
					if (chunked) {
						parser->dechunk = php_http_encoding_stream_init(parser->dechunk, php_http_encoding_stream_get_dechunk_ops(), 0);
						php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED);
						break;
					}

					if (content_range) {
						ulong total = 0, start = 0, end = 0;

						if (!strncasecmp(content_range->val, "bytes", lenof("bytes"))
						&& (	content_range->val[lenof("bytes")] == ':'
							||	content_range->val[lenof("bytes")] == ' '
							||	content_range->val[lenof("bytes")] == '='
							)
						) {
							char *total_at = NULL, *end_at = NULL;
							char *start_at = content_range->val + sizeof("bytes");

							start = strtoul(start_at, &end_at, 10);
							if (end_at) {
								end = strtoul(end_at + 1, &total_at, 10);
								if (total_at && strncmp(total_at + 1, "*", 1)) {
									total = strtoul(total_at + 1, NULL, 10);
								}

								if (end >= start && (!total || end <= total)) {
									parser->body_length = end + 1 - start;
									php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
									zend_string_release(content_range);
									break;
								}
							}
						}

						zend_string_release(content_range);
					}

					if (content_length >= 0) {
						parser->body_length = content_length;
						php_http_message_parser_state_push(parser, 1, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH);
						break;
					}

					if ((*message)->type == PHP_HTTP_REQUEST) {
						php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);
					} else {
						php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB);
					}
				}
				break;
			}

			case PHP_HTTP_MESSAGE_PARSER_STATE_BODY:
			{
				if (len) {
					if (parser->inflate) {
						char *dec_str = NULL;
						size_t dec_len;

						if (SUCCESS != php_http_encoding_stream_update(parser->inflate, str, len, &dec_str, &dec_len)) {
							return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
						}

						if (str != buffer->data) {
							PTR_FREE(str);
						}
						str = dec_str;
						len = dec_len;
					}

					php_stream_write(php_http_message_body_stream((*message)->body), str, len);
				}

				if (cut) {
					php_http_buffer_cut(buffer, 0, cut);
				}

				if (str != buffer->data) {
					PTR_FREE(str);
				}

				str = NULL;
				len = 0;
				cut = 0;
				break;
			}

			case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DUMB:
			{
				str = buffer->data;
				len = buffer->used;
				cut = len;

				php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
				break;
			}

			case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH:
			{
				len = MIN(parser->body_length, buffer->used);
				str = buffer->data;
				cut = len;

				parser->body_length -= len;

				php_http_message_parser_state_push(parser, 2, !parser->body_length?PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:PHP_HTTP_MESSAGE_PARSER_STATE_BODY_LENGTH, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
				break;
			}

			case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED:
			{
				/*
				 * - pass available data through the dechunk stream
				 * - pass decoded data along
				 * - if stream zeroed:
				 * 	Y:	- cut processed string out of buffer, but leave length of unprocessed dechunk stream data untouched
				 * 		- body done
				 * 	N:	- parse ahaed
				 */
				char *dec_str = NULL;
				size_t dec_len;

				if (SUCCESS != php_http_encoding_stream_update(parser->dechunk, buffer->data, buffer->used, &dec_str, &dec_len)) {
					return PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE;
				}

				str = dec_str;
				len = dec_len;

				if (php_http_encoding_stream_done(parser->dechunk)) {
					cut = buffer->used - PHP_HTTP_BUFFER(parser->dechunk->ctx)->used;
					php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
				} else {
					cut = buffer->used;
					php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_BODY_CHUNKED, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
				}
				break;
			}

			case PHP_HTTP_MESSAGE_PARSER_STATE_BODY_DONE:
			{
				php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_DONE);

				if (parser->dechunk && parser->dechunk->ctx) {
					char *dec_str = NULL;
					size_t dec_len;

					if (SUCCESS != php_http_encoding_stream_finish(parser->dechunk, &dec_str, &dec_len)) {
						return php_http_message_parser_state_push(parser, 1, PHP_HTTP_MESSAGE_PARSER_STATE_FAILURE);
					}
					php_http_encoding_stream_dtor(parser->dechunk);

					if (dec_str && dec_len) {
						str = dec_str;
						len = dec_len;
						cut = 0;
						php_http_message_parser_state_push(parser, 2, PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL, PHP_HTTP_MESSAGE_PARSER_STATE_BODY);
					}
				}

				break;
			}

			case PHP_HTTP_MESSAGE_PARSER_STATE_UPDATE_CL:
			{
				zval zcl;

				ZVAL_LONG(&zcl, php_http_message_body_size((*message)->body));
				zend_hash_str_update(&(*message)->hdrs, "Content-Length", lenof("Content-Length"), &zcl);
				break;
			}

			case PHP_HTTP_MESSAGE_PARSER_STATE_DONE:
			{
				char *ptr = buffer->data;

				while (ptr - buffer->data < buffer->used && PHP_HTTP_IS_CTYPE(space, *ptr)) {
					++ptr;
				}

				php_http_buffer_cut(buffer, 0, ptr - buffer->data);
				
				if (!(flags & PHP_HTTP_MESSAGE_PARSER_GREEDY)) {
					return PHP_HTTP_MESSAGE_PARSER_STATE_DONE;
				}
				break;
			}
		}
	}

	return php_http_message_parser_state_is(parser);
}
Exemple #2
0
static PHP_HTTP_FILTER_FUNCTION(chunked_decode)
{
	int out_avail = 0;
	php_stream_bucket *ptr, *nxt;
	PHP_HTTP_FILTER_BUFFER(chunked_decode) *buffer = Z_PTR(this->abstract);
	
	if (bytes_consumed) {
		*bytes_consumed = 0;
	}
	
	/* fetch available bucket data */
	for (ptr = buckets_in->head; ptr; ptr = nxt) {
		if (bytes_consumed) {
			*bytes_consumed += ptr->buflen;
		}

		if (PHP_HTTP_BUFFER_NOMEM == php_http_buffer_append(PHP_HTTP_BUFFER(buffer), ptr->buf, ptr->buflen)) {
			return PSFS_ERR_FATAL;
		}

		nxt = ptr->next;
		php_stream_bucket_unlink(ptr);
		php_stream_bucket_delref(ptr);
	}
	
	if (!php_http_buffer_fix(PHP_HTTP_BUFFER(buffer))) {
		return PSFS_ERR_FATAL;
	}

	/* we have data in our buffer */
	while (PHP_HTTP_BUFFER(buffer)->used) {
	
		/* we already know the size of the chunk and are waiting for data */
		if (buffer->hexlen) {
		
			/* not enough data buffered */
			if (PHP_HTTP_BUFFER(buffer)->used < buffer->hexlen) {
			
				/* flush anyway? */
				if (flags & PSFS_FLAG_FLUSH_INC) {
				
					/* flush all data (should only be chunk data) */
					out_avail = 1;
					NEW_BUCKET(PHP_HTTP_BUFFER(buffer)->data, PHP_HTTP_BUFFER(buffer)->used);
					
					/* waiting for less data now */
					buffer->hexlen -= PHP_HTTP_BUFFER(buffer)->used;
					/* no more buffered data */
					php_http_buffer_reset(PHP_HTTP_BUFFER(buffer));
					/* break */
				} 
				
				/* we have too less data and don't need to flush */
				else {
					break;
				}
			} 
			
			/* we seem to have all data of the chunk */
			else {
				out_avail = 1;
				NEW_BUCKET(PHP_HTTP_BUFFER(buffer)->data, buffer->hexlen);
				
				/* remove outgoing data from the buffer */
				php_http_buffer_cut(PHP_HTTP_BUFFER(buffer), 0, buffer->hexlen);
				/* reset hexlen */
				buffer->hexlen = 0;
				/* continue */
			}
		} 
		
		/* we don't know the length of the chunk yet */
		else {
			size_t off = 0;
			
			/* ignore preceeding CRLFs (too loose?) */
			while (off < PHP_HTTP_BUFFER(buffer)->used && (
					PHP_HTTP_BUFFER(buffer)->data[off] == '\n' || 
					PHP_HTTP_BUFFER(buffer)->data[off] == '\r')) {
				++off;
			}
			if (off) {
				php_http_buffer_cut(PHP_HTTP_BUFFER(buffer), 0, off);
			}
			
			/* still data there? */
			if (PHP_HTTP_BUFFER(buffer)->used) {
				int eollen;
				const char *eolstr;
				
				/* we need eol, so we can be sure we have all hex digits */
				php_http_buffer_fix(PHP_HTTP_BUFFER(buffer));
				if ((eolstr = php_http_locate_bin_eol(PHP_HTTP_BUFFER(buffer)->data, PHP_HTTP_BUFFER(buffer)->used, &eollen))) {
					char *stop = NULL;
					
					/* read in chunk size */
					buffer->hexlen = strtoul(PHP_HTTP_BUFFER(buffer)->data, &stop, 16);
					
					/*	if strtoul() stops at the beginning of the buffered data
						there's something oddly wrong, i.e. bad input */
					if (stop == PHP_HTTP_BUFFER(buffer)->data) {
						return PSFS_ERR_FATAL;
					}
					
					/* cut out <chunk size hex><chunk extension><eol> */
					php_http_buffer_cut(PHP_HTTP_BUFFER(buffer), 0, eolstr + eollen - PHP_HTTP_BUFFER(buffer)->data);
					/* buffer->hexlen is 0 now or contains the size of the next chunk */
					if (!buffer->hexlen) {
						php_stream_notify_info(PHP_STREAM_CONTEXT(stream), PHP_STREAM_NOTIFY_COMPLETED, NULL, 0);
						break;
					}
					/* continue */
				} else {
					/* we have not enough data buffered to read in chunk size */
					break;
				}
			}
			/* break */
		}
	}
	
	/* flush before close, but only if we are already waiting for more data */
	if (PHP_HTTP_FILTER_IS_CLOSING(stream, flags) && buffer->hexlen && PHP_HTTP_BUFFER(buffer)->used) {
		out_avail = 1;
		NEW_BUCKET(PHP_HTTP_BUFFER(buffer)->data, PHP_HTTP_BUFFER(buffer)->used);
		php_http_buffer_reset(PHP_HTTP_BUFFER(buffer));
		buffer->hexlen = 0;
	}
	
	return out_avail ? PSFS_PASS_ON : PSFS_FEED_ME;
}