/** * Generic request line parser. * * @param connp * @return HTP_OK or HTP_ERROR */ int htp_parse_request_line_generic(htp_connp_t *connp) { htp_tx_t *tx = connp->in_tx; unsigned char *data = (unsigned char *)bstr_ptr(tx->request_line); size_t len = bstr_len(tx->request_line); size_t pos = 0; // The request method starts at the beginning of the // line and ends with the first whitespace character. while ((pos < len) && (!htp_is_space(data[pos]))) { pos++; } // No, we don't care if the method is empty. tx->request_method = bstr_memdup((char *)data, pos); if (tx->request_method == NULL) { return HTP_ERROR; } tx->request_method_number = htp_convert_method_to_number(tx->request_method); // Ignore whitespace after request method. The RFC allows // for only one SP, but then suggests any number of SP and HT // should be permitted. while ((pos < len) && (isspace(data[pos]))) { pos++; } size_t start = pos; // The URI ends with the first whitespace. while ((pos < len) && (!htp_is_space(data[pos]))) { pos++; } tx->request_uri = bstr_memdup((char *)data + start, pos - start); if (tx->request_uri == NULL) { return HTP_ERROR; } // Ignore whitespace after URI while ((pos < len) && (htp_is_space(data[pos]))) { pos++; } // Is there protocol information available? if (pos == len) { // No, this looks like a HTTP/0.9 request. tx->protocol_is_simple = 1; return HTP_OK; } // The protocol information spreads until the end of the line. tx->request_protocol = bstr_memdup((char *)data + pos, len - pos); if (tx->request_protocol == NULL) { return HTP_ERROR; } tx->request_protocol_number = htp_parse_protocol(tx->request_protocol); return HTP_OK; }
/** * Parse request line as Apache 2.2 does. * * @param connp * @return HTP_OK or HTP_ERROR */ int htp_parse_request_line_apache_2_2(htp_connp_t *connp) { htp_tx_t *tx = connp->in_tx; unsigned char *data = (unsigned char *) bstr_ptr(tx->request_line); size_t len = bstr_len(tx->request_line); size_t pos = 0; // In this implementation we assume the // line ends with the first NUL byte. if (tx->request_line_nul_offset != -1) { len = tx->request_line_nul_offset - 1; } // The request method starts at the beginning of the // line and ends with the first whitespace character. while ((pos < len) && (!htp_is_space(data[pos]))) { pos++; } // No, we don't care if the method is empty. tx->request_method = bstr_memdup((char *) data, pos); #ifdef HTP_DEBUG fprint_raw_data(stderr, __FUNCTION__, (unsigned char *)bstr_ptr(tx->request_method), bstr_len(tx->request_method)); #endif tx->request_method_number = htp_convert_method_to_number(tx->request_method); // Ignore whitespace after request method. The RFC allows // for only one SP, but then suggests any number of SP and HT // should be permitted. Apache uses isspace(), which is even // more permitting, so that's what we use here. while ((pos < len) && (isspace(data[pos]))) { pos++; } size_t start = pos; // The URI ends with the first whitespace. while ((pos < len) && (!htp_is_space(data[pos]))) { pos++; } tx->request_uri = bstr_memdup((char *) data + start, pos - start); if (tx->request_uri == NULL) { return HTP_ERROR; } #ifdef HTP_DEBUG fprint_raw_data(stderr, __FUNCTION__, (unsigned char *)bstr_ptr(tx->request_uri), bstr_len(tx->request_uri)); #endif // Ignore whitespace after URI while ((pos < len) && (htp_is_space(data[pos]))) { pos++; } // Is there protocol information available? if (pos == len) { // No, this looks like a HTTP/0.9 request. tx->protocol_is_simple = 1; return HTP_OK; } // The protocol information spreads until the end of the line. tx->request_protocol = bstr_memdup((char *) data + pos, len - pos); tx->request_protocol_number = htp_parse_protocol(tx->request_protocol); #ifdef HTP_DEBUG fprint_raw_data(stderr, __FUNCTION__, (unsigned char *)bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol)); #endif return HTP_OK; }
/** * Parses a message header line as Apache 2.2 does. * * @param connp * @param h * @param data * @param len * @return HTP_OK or HTP_ERROR */ int htp_parse_request_header_apache_2_2(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) { size_t name_start, name_end; size_t value_start, value_end; htp_chomp(data, &len); name_start = 0; // Look for the colon size_t colon_pos = 0; while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++; if ((colon_pos == len) || (data[colon_pos] == '\0')) { // Missing colon h->flags |= HTP_FIELD_UNPARSEABLE; if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) { connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE; // Only log once per transaction htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request field invalid: colon missing"); } return HTP_ERROR; } if (colon_pos == 0) { // Empty header name h->flags |= HTP_FIELD_INVALID; if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { connp->in_tx->flags |= HTP_FIELD_INVALID; // Only log once per transaction htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name"); } } name_end = colon_pos; // Ignore LWS after field-name size_t prev = name_end - 1; while ((prev > name_start) && (htp_is_lws(data[prev]))) { prev--; name_end--; h->flags |= HTP_FIELD_INVALID; if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { connp->in_tx->flags |= HTP_FIELD_INVALID; htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name"); } } // Value value_start = colon_pos; // Go over the colon if (value_start < len) { value_start++; } // Ignore LWS before field-content while ((value_start < len) && (htp_is_lws(data[value_start]))) { value_start++; } // Look for the end of field-content value_end = value_start; while ((value_end < len) && (data[value_end] != '\0')) value_end++; // Ignore LWS after field-content prev = value_end - 1; while ((prev > value_start) && (htp_is_lws(data[prev]))) { prev--; value_end--; } // Check that the header name is a token size_t i = name_start; while (i < name_end) { if (!htp_is_token(data[i])) { h->flags |= HTP_FIELD_INVALID; if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) { connp->in_tx->flags |= HTP_FIELD_INVALID; htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token"); } break; } i++; } // Now extract the name and the value h->name = bstr_memdup((char *) data + name_start, name_end - name_start); h->value = bstr_memdup((char *) data + value_start, value_end - value_start); return HTP_OK; }
/** * Parses request line. * * @param connp * @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed. */ int htp_connp_REQ_LINE(htp_connp_t *connp) { for (;;) { // Get one byte IN_COPY_BYTE_OR_RETURN(connp); // Keep track of NUL bytes if (connp->in_next_byte == 0) { // Remember how many NULs there were connp->in_tx->request_line_nul++; // Store the offset of the first NUL byte if (connp->in_tx->request_line_nul_offset == -1) { connp->in_tx->request_line_nul_offset = connp->in_line_len; } } // Have we reached the end of the line? if (connp->in_next_byte == LF) { #ifdef HTP_DEBUG fprint_raw_data(stderr, __FUNCTION__, connp->in_line, connp->in_line_len); #endif // Is this a line that should be ignored? if (htp_connp_is_line_ignorable(connp, connp->in_line, connp->in_line_len)) { // We have an empty/whitespace line, which we'll note, ignore and move on connp->in_tx->request_ignored_lines++; // TODO How many empty lines are we willing to accept? // Start again connp->in_line_len = 0; return HTP_OK; } // Process request line htp_chomp(connp->in_line, &connp->in_line_len); connp->in_tx->request_line = bstr_memdup((char *) connp->in_line, connp->in_line_len); if (connp->in_tx->request_line == NULL) { return HTP_ERROR; } // Parse request line if (connp->cfg->parse_request_line(connp) != HTP_OK) { // Note: downstream responsible for error logging return HTP_ERROR; } if (connp->in_tx->request_method_number == M_CONNECT) { // Parse authority if (htp_parse_authority(connp, connp->in_tx->request_uri, &(connp->in_tx->parsed_uri_incomplete)) != HTP_OK) { // Note: downstream responsible for error logging return HTP_ERROR; } } else { // Parse the request URI if (htp_parse_uri(connp->in_tx->request_uri, &(connp->in_tx->parsed_uri_incomplete)) != HTP_OK) { // Note: downstream responsible for error logging return HTP_ERROR; } // Keep the original URI components, but // create a copy which we can normalize and use internally if (htp_normalize_parsed_uri(connp, connp->in_tx->parsed_uri_incomplete, connp->in_tx->parsed_uri)) { // Note: downstream responsible for error logging return HTP_ERROR; } // Run hook REQUEST_URI_NORMALIZE int rc = hook_run_all(connp->cfg->hook_request_uri_normalize, connp); if (rc != HOOK_OK) { htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request URI normalize callback returned error (%d)", rc); return HTP_ERROR; } // Now is a good time to generate request_uri_normalized, before we finalize // parsed_uri (and lose the information which parts were provided in the request and // which parts we added). if (connp->cfg->generate_request_uri_normalized) { connp->in_tx->request_uri_normalized = htp_unparse_uri_noencode(connp->in_tx->parsed_uri); if (connp->in_tx->request_uri_normalized == NULL) { // There's no sense in logging anything on a memory allocation failure return HTP_ERROR; } #ifdef HTP_DEBUG fprint_raw_data(stderr, "request_uri_normalized", (unsigned char *) bstr_ptr(connp->in_tx->request_uri_normalized), bstr_len(connp->in_tx->request_uri_normalized)); #endif } // Finalize parsed_uri // Scheme if (connp->in_tx->parsed_uri->scheme != NULL) { if (bstr_cmpc(connp->in_tx->parsed_uri->scheme, "http") != 0) { // TODO Invalid scheme } } else { connp->in_tx->parsed_uri->scheme = bstr_cstrdup("http"); if (connp->in_tx->parsed_uri->scheme == NULL) { return HTP_ERROR; } } // Port if (connp->in_tx->parsed_uri->port != NULL) { if (connp->in_tx->parsed_uri->port_number != -1) { // Check that the port in the URI is the same // as the port on which the client is talking // to the server if (connp->in_tx->parsed_uri->port_number != connp->conn->local_port) { // Incorrect port; use the real port instead connp->in_tx->parsed_uri->port_number = connp->conn->local_port; // TODO Log } } else { // Invalid port; use the real port instead connp->in_tx->parsed_uri->port_number = connp->conn->local_port; // TODO Log } } else { connp->in_tx->parsed_uri->port_number = connp->conn->local_port; } // Path if (connp->in_tx->parsed_uri->path == NULL) { connp->in_tx->parsed_uri->path = bstr_cstrdup("/"); if (connp->in_tx->parsed_uri->path == NULL) { return HTP_ERROR; } } } // Run hook REQUEST_LINE int rc = hook_run_all(connp->cfg->hook_request_line, connp); if (rc != HOOK_OK) { htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request line callback returned error (%d)", rc); return HTP_ERROR; } // Clean up. connp->in_line_len = 0; // Move on to the next phase. connp->in_state = htp_connp_REQ_PROTOCOL; return HTP_OK; } } return HTP_ERROR; }
/** * Parses request headers. * * @param connp * @returns HTP_OK on state change, HTTP_ERROR on error, or HTP_DATA when more data is needed. */ int htp_connp_REQ_HEADERS(htp_connp_t *connp) { for (;;) { IN_COPY_BYTE_OR_RETURN(connp); if (connp->in_header_line == NULL) { connp->in_header_line = calloc(1, sizeof (htp_header_line_t)); if (connp->in_header_line == NULL) return HTP_ERROR; connp->in_header_line->first_nul_offset = -1; } // Keep track of NUL bytes if (connp->in_next_byte == 0) { // Store the offset of the first NUL if (connp->in_header_line->has_nulls == 0) { connp->in_header_line->first_nul_offset = connp->in_line_len; } // Remember how many NULs there were connp->in_header_line->flags |= HTP_FIELD_NUL_BYTE; connp->in_header_line->has_nulls++; } // Have we reached the end of the line? if (connp->in_next_byte == LF) { #ifdef HTP_DEBUG fprint_raw_data(stderr, __FUNCTION__, connp->in_line, connp->in_line_len); #endif // Should we terminate headers? if (htp_connp_is_line_terminator(connp, connp->in_line, connp->in_line_len)) { // Terminator line // Parse previous header, if any if (connp->in_header_line_index != -1) { if (connp->cfg->process_request_header(connp) != HTP_OK) { // Note: downstream responsible for error logging return HTP_ERROR; } // Reset index connp->in_header_line_index = -1; } // Cleanup free(connp->in_header_line); connp->in_line_len = 0; connp->in_header_line = NULL; // We've seen all request headers if (connp->in_chunk_count != connp->in_chunk_request_index) { connp->in_tx->flags |= HTP_MULTI_PACKET_HEAD; } // Move onto the next processing phase if (connp->in_tx->progress[0] == TX_PROGRESS_REQ_HEADERS) { // Determine if this request has a body //connp->in_state = htp_connp_REQ_BODY_DETERMINE; connp->in_state = htp_connp_REQ_CONNECT_CHECK; } else { // Run hook REQUEST_TRAILER int rc = hook_run_all(connp->cfg->hook_request_trailer, connp); if (rc != HOOK_OK) { htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request trailer callback returned error (%d)", rc); return HTP_ERROR; } // We've completed parsing this request connp->in_state = htp_connp_REQ_IDLE; connp->in_tx->progress[0] = TX_PROGRESS_WAIT; } return HTP_OK; } // Prepare line for consumption size_t raw_in_line_len = connp->in_line_len; htp_chomp(connp->in_line, &connp->in_line_len); // Check for header folding if (htp_connp_is_line_folded(connp->in_line, connp->in_line_len) == 0) { // New header line // Parse previous header, if any if (connp->in_header_line_index != -1) { if (connp->cfg->process_request_header(connp) != HTP_OK) { // Note: downstream responsible for error logging return HTP_ERROR; } // Reset index connp->in_header_line_index = -1; } // Remember the index of the fist header line connp->in_header_line_index = connp->in_header_line_counter; } else { // Folding; check that there's a previous header line to add to if (connp->in_header_line_index == -1) { if (!(connp->in_tx->flags & HTP_INVALID_FOLDING)) { connp->in_tx->flags |= HTP_INVALID_FOLDING; htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Invalid request field folding"); } } } // Add the raw header line to the list if (raw_in_line_len > connp->in_line_len) { if (raw_in_line_len - connp->in_line_len == 2 && connp->in_line[connp->in_line_len] == 0x0d && connp->in_line[connp->in_line_len + 1] == 0x0a) { connp->in_header_line->terminators = NULL; } else { connp->in_header_line->terminators = bstr_memdup((char *) connp->in_line + connp->in_line_len, raw_in_line_len - connp->in_line_len); if (connp->in_header_line->terminators == NULL) { return HTP_ERROR; } } } else { connp->in_header_line->terminators = NULL; } connp->in_header_line->line = bstr_memdup((char *) connp->in_line, connp->in_line_len); if (connp->in_header_line->line == NULL) { return HTP_ERROR; } list_add(connp->in_tx->request_header_lines, connp->in_header_line); connp->in_header_line = NULL; // Cleanup for the next line connp->in_line_len = 0; if (connp->in_header_line_index == -1) { connp->in_header_line_index = connp->in_header_line_counter; } connp->in_header_line_counter++; } } return HTP_ERROR; }