Buffer ExampleFilter::process(const BufferRef& input) { Buffer result; switch (mode_) { case ExampleFilter::LOWER: for (auto i = input.cbegin(), e = input.cend(); i != e; ++i) result.push_back(static_cast<char>(std::tolower(*i))); break; case ExampleFilter::UPPER: for (auto i = input.cbegin(), e = input.cend(); i != e; ++i) result.push_back(static_cast<char>(std::toupper(*i))); break; case ExampleFilter::IDENTITY: default: result.push_back(input); } return result; }
/** processes a message-chunk. * * \param chunk the chunk of bytes to process * * \return number of bytes actually parsed and processed */ std::size_t HttpMessageProcessor::process(const BufferRef& chunk, size_t* out_nparsed) { /* * CR = 0x0D * LF = 0x0A * SP = 0x20 * HT = 0x09 * * CRLF = CR LF * LWS = [CRLF] 1*( SP | HT ) * * HTTP-message = Request | Response * * generic-message = start-line * *(message-header CRLF) * CRLF * [ message-body ] * * start-line = Request-Line | Status-Line * * Request-Line = Method SP Request-URI SP HTTP-Version CRLF * * Method = "OPTIONS" | "GET" | "HEAD" * | "POST" | "PUT" | "DELETE" * | "TRACE" | "CONNECT" * | extension-method * * Request-URI = "*" | absoluteURI | abs_path | authority * * extension-method = token * * Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF * * HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT * Status-Code = 3*DIGIT * Reason-Phrase = *<TEXT, excluding CR, LF> * * absoluteURI = "http://" [user ':'******'@'] hostname [abs_path] [qury] * abs_path = "/" *CHAR * authority = ... * token = 1*<any CHAR except CTLs or seperators> * separator = "(" | ")" | "<" | ">" | "@" * | "," | ";" | ":" | "\" | <"> * | "/" | "[" | "]" | "?" | "=" * | "{" | "}" | SP | HT * * message-header = field-name ":" [ field-value ] * field-name = token * field-value = *( field-content | LWS ) * field-content = <the OCTETs making up the field-value * and consisting of either *TEXT or combinations * of token, separators, and quoted-string> * * message-body = entity-body * | <entity-body encoded as per Transfer-Encoding> */ const char* i = chunk.cbegin(); const char* e = chunk.cend(); const size_t initialOutOffset = out_nparsed ? *out_nparsed : 0; size_t result = initialOutOffset; size_t* nparsed = out_nparsed ? out_nparsed : &result; //TRACE(2, "process(curState:%s): size: %ld: '%s'", state_str(), chunk.size(), chunk.str().c_str()); TRACE(2, "process(curState:%s): size: %ld", state_str(), chunk.size()); #if 0 switch (state_) { case CONTENT: // fixed size content if (!passContent(chunk, nparsed)) goto done; i += *nparsed; break; case CONTENT_ENDLESS: // endless-sized content (until stream end) { *nparsed += chunk.size(); bool rv = filters_.empty() ? onMessageContent(chunk) : onMessageContent(filters_.process(chunk)); goto done; } default: break; } #endif while (i != e) { #if !defined(XZERO_NDEBUG) if (std::isprint(*i)) { TRACE(3, "parse: %4ld, 0x%02X (%c), %s", *nparsed, *i, *i, state_str()); } else { TRACE(3, "parse: %4ld, 0x%02X, %s", *nparsed, *i, state_str()); } #endif switch (state_) { case MESSAGE_BEGIN: contentLength_ = -1; switch (mode_) { case REQUEST: state_ = REQUEST_LINE_BEGIN; versionMajor_ = 0; versionMinor_ = 0; break; case RESPONSE: state_ = STATUS_LINE_BEGIN; code_ = 0; versionMajor_ = 0; versionMinor_ = 0; break; case MESSAGE: state_ = HEADER_NAME_BEGIN; // an internet message has no special top-line, // so we just invoke the callback right away if (!onMessageBegin()) goto done; break; } break; case REQUEST_LINE_BEGIN: if (isToken(*i)) { state_ = REQUEST_METHOD; method_ = chunk.ref(*nparsed - initialOutOffset, 1); ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case REQUEST_METHOD: if (*i == SP) { state_ = REQUEST_ENTITY_BEGIN; ++*nparsed; ++i; } else if (!isToken(*i)) { state_ = SYNTAX_ERROR; } else { method_.shr(); ++*nparsed; ++i; } break; case REQUEST_ENTITY_BEGIN: if (std::isprint(*i)) { entity_ = chunk.ref(*nparsed - initialOutOffset, 1); state_ = REQUEST_ENTITY; ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case REQUEST_ENTITY: if (*i == SP) { state_ = REQUEST_PROTOCOL_BEGIN; ++*nparsed; ++i; } else if (std::isprint(*i)) { entity_.shr(); ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case REQUEST_PROTOCOL_BEGIN: if (*i != 'H') { state_ = SYNTAX_ERROR; } else { state_ = REQUEST_PROTOCOL_T1; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_T1: if (*i != 'T') { state_ = SYNTAX_ERROR; } else { state_ = REQUEST_PROTOCOL_T2; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_T2: if (*i != 'T') { state_ = SYNTAX_ERROR; } else { state_ = REQUEST_PROTOCOL_P; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_P: if (*i != 'P') { state_ = SYNTAX_ERROR; } else { state_ = REQUEST_PROTOCOL_SLASH; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_SLASH: if (*i != '/') { state_ = SYNTAX_ERROR; } else { state_ = REQUEST_PROTOCOL_VERSION_MAJOR; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_VERSION_MAJOR: if (*i == '.') { state_ = REQUEST_PROTOCOL_VERSION_MINOR; ++*nparsed; ++i; } else if (!std::isdigit(*i)) { state_ = SYNTAX_ERROR; } else { versionMajor_ = versionMajor_ * 10 + *i - '0'; ++*nparsed; ++i; } break; case REQUEST_PROTOCOL_VERSION_MINOR: if (*i == CR) { state_ = REQUEST_LINE_LF; ++*nparsed; ++i; } #if defined(X0_HTTP_SUPPORT_SHORT_LF) else if (*i == LF) { state_ = HEADER_NAME_BEGIN; ++*nparsed; ++i; TRACE(2, "request-line: method=%s, entity=%s, vmaj=%d, vmin=%d", method_.str().c_str(), entity_.str().c_str(), versionMajor_, versionMinor_); if (!onMessageBegin(method_, entity_, versionMajor_, versionMinor_)) { goto done; } } #endif else if (!std::isdigit(*i)) { state_ = SYNTAX_ERROR; } else { versionMinor_ = versionMinor_ * 10 + *i - '0'; ++*nparsed; ++i; } break; case REQUEST_LINE_LF: if (*i == LF) { state_ = HEADER_NAME_BEGIN; ++*nparsed; ++i; TRACE(2, "request-line: method=%s, entity=%s, vmaj=%d, vmin=%d", method_.str().c_str(), entity_.str().c_str(), versionMajor_, versionMinor_); if (!onMessageBegin(method_, entity_, versionMajor_, versionMinor_)) { goto done; } } else state_ = SYNTAX_ERROR; break; case STATUS_LINE_BEGIN: case STATUS_PROTOCOL_BEGIN: if (*i != 'H') { state_ = SYNTAX_ERROR; } else { state_ = STATUS_PROTOCOL_T1; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_T1: if (*i != 'T') { state_ = SYNTAX_ERROR; } else { state_ = STATUS_PROTOCOL_T2; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_T2: if (*i != 'T') { state_ = SYNTAX_ERROR; } else { state_ = STATUS_PROTOCOL_P; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_P: if (*i != 'P') { state_ = SYNTAX_ERROR; } else { state_ = STATUS_PROTOCOL_SLASH; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_SLASH: if (*i != '/') { state_ = SYNTAX_ERROR; } else { state_ = STATUS_PROTOCOL_VERSION_MAJOR; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_VERSION_MAJOR: if (*i == '.') { state_ = STATUS_PROTOCOL_VERSION_MINOR; ++*nparsed; ++i; } else if (!std::isdigit(*i)) { state_ = SYNTAX_ERROR; } else { versionMajor_ = versionMajor_ * 10 + *i - '0'; ++*nparsed; ++i; } break; case STATUS_PROTOCOL_VERSION_MINOR: if (*i == SP) { state_ = STATUS_CODE_BEGIN; ++*nparsed; ++i; } else if (!std::isdigit(*i)) { state_ = SYNTAX_ERROR; } else { versionMinor_ = versionMinor_ * 10 + *i - '0'; ++*nparsed; ++i; } break; case STATUS_CODE_BEGIN: if (!std::isdigit(*i)) { code_ = SYNTAX_ERROR; break; } state_ = STATUS_CODE; /* fall through */ case STATUS_CODE: if (std::isdigit(*i)) { code_ = code_ * 10 + *i - '0'; ++*nparsed; ++i; } else if (*i == SP) { state_ = STATUS_MESSAGE_BEGIN; ++*nparsed; ++i; } else if (*i == CR) { // no Status-Message passed state_ = STATUS_MESSAGE_LF; ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case STATUS_MESSAGE_BEGIN: if (isText(*i)) { state_ = STATUS_MESSAGE; message_ = chunk.ref(*nparsed - initialOutOffset, 1); ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case STATUS_MESSAGE: if (isText(*i) && *i != CR && *i != LF) { message_.shr(); ++*nparsed; ++i; } else if (*i == CR) { state_ = STATUS_MESSAGE_LF; ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case STATUS_MESSAGE_LF: if (*i == LF) { state_ = HEADER_NAME_BEGIN; ++*nparsed; ++i; //TRACE(2, "status-line: HTTP/%d.%d, code=%d, message=%s", versionMajor_, versionMinor_, code_, message_.str().c_str()); if (!onMessageBegin(versionMajor_, versionMinor_, code_, message_)) { goto done; } } else state_ = SYNTAX_ERROR; break; case HEADER_NAME_BEGIN: if (isToken(*i)) { name_ = chunk.ref(*nparsed - initialOutOffset, 1); state_ = HEADER_NAME; ++*nparsed; ++i; } else if (*i == CR) { state_ = HEADER_END_LF; ++*nparsed; ++i; } #if defined(X0_HTTP_SUPPORT_SHORT_LF) else if (*i == LF) state_ = HEADER_END_LF; #endif else state_ = SYNTAX_ERROR; break; case HEADER_NAME: if (isToken(*i)) { name_.shr(); ++*nparsed; ++i; } else if (*i == ':') { state_ = LWS_BEGIN; lwsNext_ = HEADER_VALUE_BEGIN; lwsNull_ = HEADER_VALUE_END; // only (CR LF) parsed, assume empty value & go on with next header ++*nparsed; ++i; } else if (*i == CR) { state_ = LWS_LF; lwsNext_ = HEADER_COLON; lwsNull_ = SYNTAX_ERROR; ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case HEADER_COLON: if (*i == ':') { state_ = LWS_BEGIN; lwsNext_ = HEADER_VALUE_BEGIN; lwsNull_ = HEADER_VALUE_END; ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case LWS_BEGIN: if (*i == CR) { state_ = LWS_LF; ++*nparsed; ++i; #if defined(X0_HTTP_SUPPORT_SHORT_LF) } else if (*i == LF) { state_ = LWS_SP_HT_BEGIN; ++*nparsed; ++i; #endif } else if (*i == SP || *i == HT) { state_ = LWS_SP_HT; ++*nparsed; ++i; } else if (std::isprint(*i)) { state_ = lwsNext_; } else state_ = SYNTAX_ERROR; break; case LWS_LF: if (*i == LF) { state_ = LWS_SP_HT_BEGIN; ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case LWS_SP_HT_BEGIN: if (*i == SP || *i == HT) { if (!value_.empty()) value_.shr(3); // CR LF (SP | HT) state_ = LWS_SP_HT; ++*nparsed; ++i; } else { // only (CF LF) parsed so far and no 1*(SP | HT) found. state_ = lwsNull_; // XXX no nparsed/i-update } break; case LWS_SP_HT: if (*i == SP || *i == HT) { if (!value_.empty()) value_.shr(); ++*nparsed; ++i; } else state_ = lwsNext_; break; case HEADER_VALUE_BEGIN: if (isText(*i)) { value_ = chunk.ref(*nparsed - initialOutOffset, 1); ++*nparsed; ++i; state_ = HEADER_VALUE; } else if (*i == CR) { state_ = HEADER_VALUE_LF; ++*nparsed; ++i; #if defined(X0_HTTP_SUPPORT_SHORT_LF) } else if (*i == LF) { state_ = HEADER_VALUE_END; ++*nparsed; ++i; #endif } else { state_ = SYNTAX_ERROR; } break; case HEADER_VALUE: if (*i == CR) { state_ = LWS_LF; lwsNext_ = HEADER_VALUE; lwsNull_ = HEADER_VALUE_END; ++*nparsed; ++i; } #if defined(X0_HTTP_SUPPORT_SHORT_LF) else if (*i == LF) { state_ = LWS_SP_HT_BEGIN; lwsNext_ = HEADER_VALUE; lwsNull_ = HEADER_VALUE_END; ++*nparsed; ++i; } #endif else if (isText(*i)) { value_.shr(); ++*nparsed; ++i; } else state_ = SYNTAX_ERROR; break; case HEADER_VALUE_LF: if (*i == LF) { state_ = HEADER_VALUE_END; ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case HEADER_VALUE_END: { TRACE(2, "header: name='%s', value='%s'", name_.str().c_str(), value_.str().c_str()); if (iequals(name_, "Content-Length")) { contentLength_ = value_.toInt(); TRACE(2, "set content length to: %ld", contentLength_); } else if (iequals(name_, "Transfer-Encoding")) { if (iequals(value_, "chunked")) { chunked_ = true; } } bool rv = onMessageHeader(name_, value_); name_.clear(); value_.clear(); // continue with the next header state_ = HEADER_NAME_BEGIN; if (!rv) { goto done; } break; } case HEADER_END_LF: if (*i == LF) { if (isContentExpected()) state_ = CONTENT_BEGIN; else state_ = MESSAGE_BEGIN; ++*nparsed; ++i; if (!onMessageHeaderEnd()) { TRACE(2, "messageHeaderEnd returned false. returning `Aborted`-state"); goto done; } if (!isContentExpected() && !onMessageEnd()) { goto done; } } else { state_ = SYNTAX_ERROR; } break; case CONTENT_BEGIN: if (chunked_) state_ = CONTENT_CHUNK_SIZE_BEGIN; else if (contentLength_ >= 0) state_ = CONTENT; else state_ = CONTENT_ENDLESS; break; case CONTENT_ENDLESS: { // body w/o content-length (allowed in simple MESSAGE types only) BufferRef c(chunk.ref(*nparsed - initialOutOffset)); //TRACE(2, "prepared content-chunk (%ld bytes): %s", c.size(), c.str().c_str()); *nparsed += c.size(); i += c.size(); bool rv = filters_.empty() ? onMessageContent(c) : onMessageContent(filters_.process(c).ref()); if (!rv) goto done; break; } case CONTENT: { // fixed size content length std::size_t offset = *nparsed - initialOutOffset; std::size_t chunkSize = std::min(static_cast<size_t>(contentLength_), chunk.size() - offset); contentLength_ -= chunkSize; *nparsed += chunkSize; i += chunkSize; bool rv = filters_.empty() ? onMessageContent(chunk.ref(offset, chunkSize)) : onMessageContent(filters_.process(chunk.ref(offset, chunkSize)).ref()); if (contentLength_ == 0) state_ = MESSAGE_BEGIN; if (!rv) goto done; if (state_ == MESSAGE_BEGIN && !onMessageEnd()) goto done; break; } case CONTENT_CHUNK_SIZE_BEGIN: if (!std::isxdigit(*i)) { state_ = SYNTAX_ERROR; break; } state_ = CONTENT_CHUNK_SIZE; contentLength_ = 0; /* fall through */ case CONTENT_CHUNK_SIZE: if (*i == CR) { state_ = CONTENT_CHUNK_LF1; ++*nparsed; ++i; } else if (*i >= '0' && *i <= '9') { contentLength_ = contentLength_ * 16 + *i - '0'; ++*nparsed; ++i; } else if (*i >= 'a' && *i <= 'f') { contentLength_ = contentLength_ * 16 + 10 + *i - 'a'; ++*nparsed; ++i; } else if (*i >= 'A' && *i <= 'F') { contentLength_ = contentLength_ * 16 + 10 + *i - 'A'; ++*nparsed; ++i; } else { state_ = SYNTAX_ERROR; } break; case CONTENT_CHUNK_LF1: if (*i != LF) { state_ = SYNTAX_ERROR; } else { //TRACE(2, "content_length: %ld", contentLength_); if (contentLength_ != 0) state_ = CONTENT_CHUNK_BODY; else state_ = CONTENT_CHUNK_CR3; ++*nparsed; ++i; } break; case CONTENT_CHUNK_BODY: if (contentLength_) { std::size_t offset = *nparsed - initialOutOffset; std::size_t chunkSize = std::min(static_cast<size_t>(contentLength_), chunk.size() - offset); contentLength_ -= chunkSize; *nparsed += chunkSize; i += chunkSize; bool rv = filters_.empty() ? onMessageContent(chunk.ref(offset, chunkSize)) : onMessageContent(filters_.process(chunk.ref(offset, chunkSize)).ref()); if (!rv) { goto done; } } else if (*i == CR) { state_ = CONTENT_CHUNK_LF2; ++*nparsed; ++i; } break; case CONTENT_CHUNK_LF2: if (*i != LF) { state_ = SYNTAX_ERROR; } else { state_ = CONTENT_CHUNK_SIZE; ++*nparsed; ++i; } break; case CONTENT_CHUNK_CR3: if (*i != CR) { state_ = SYNTAX_ERROR; } else { state_ = CONTENT_CHUNK_LF3; ++*nparsed; ++i; } break; case CONTENT_CHUNK_LF3: if (*i != LF) { state_ = SYNTAX_ERROR; } else { ++*nparsed; ++i; if (!onMessageEnd()) goto done; state_ = MESSAGE_BEGIN; } break; case SYNTAX_ERROR: { #if !defined(XZERO_NDEBUG) TRACE(1, "parse: syntax error"); if (std::isprint(*i)) { TRACE(1, "parse: syntax error at nparsed: %ld, character: '%c'", *nparsed, *i); } else { TRACE(1, "parse: syntax error at nparsed: %ld, character: 0x%02X", *nparsed, *i); } chunk.dump("request chunk (at syntax error)"); #endif goto done; } default: #if !defined(XZERO_NDEBUG) TRACE(1, "parse: unknown state %i", state_); if (std::isprint(*i)) { TRACE(1, "parse: internal error at nparsed: %ld, character: '%c'", *nparsed, *i); } else { TRACE(1, "parse: internal error at nparsed: %ld, character: 0x%02X", *nparsed, *i); } Buffer::dump(chunk.data(), chunk.size(), "request chunk (at unknown state)"); #endif goto done; } } // we've reached the end of the chunk if (state_ == CONTENT_BEGIN) { // we've just parsed all headers but no body yet. if (contentLength_ < 0 && !chunked_ && mode_ != MESSAGE) { // and there's no body to come if (!onMessageEnd()) goto done; // subsequent calls to process() parse next request(s). state_ = MESSAGE_BEGIN; } } done: return *nparsed - initialOutOffset; }