TEST(BstrBuilder, Append) { bstr_builder_t *bb = bstr_builder_create(); bstr *str1 = bstr_dup_c("0123456789"); bstr *str2 = bstr_dup_c("abcdefghijklmnopqrstuvwxyz"); EXPECT_EQ(0, bstr_builder_size(bb)); bstr_builder_appendn(bb, str1); bstr_builder_append_c(bb, "#"); bstr_builder_appendn(bb, str2); bstr_builder_append_c(bb, "#"); bstr_builder_append_mem(bb, "!@#$%^&*()", 4); EXPECT_EQ(5, bstr_builder_size(bb)); bstr *result = bstr_builder_to_str(bb); EXPECT_EQ(42, bstr_len(result)); EXPECT_EQ(0, memcmp("0123456789#abcdefghijklmnopqrstuvwxyz#!@#$", bstr_ptr(result),42)); bstr_free(result); bstr_builder_clear(bb); EXPECT_EQ(0, bstr_builder_size(bb)); bstr_builder_destroy(bb); }
/** * This method is invoked whenever a piece of data, belonging to a single field (name or value) * becomes available. It will either create a new parameter or store the transient information * until a parameter can be created. * * @param urlenp * @param data * @param startpos * @param endpos * @param c Should contain -1 if the reason this function is called is because the end of * the current data chunk is reached. */ static void htp_urlenp_add_field_piece(htp_urlenp_t *urlenp, unsigned char *data, size_t startpos, size_t endpos, int c) { // Add field if we know it ended or if we know that // we've used all of the input data if ((c != -1) || (urlenp->_complete)) { // Add field bstr *field = NULL; // Did we use the string builder for this field? if (bstr_builder_size(urlenp->_bb) > 0) { // The current field consists of more than once piece, // we have to use the string builder // Add current piece to string builder if (endpos - startpos > 0) { bstr_builder_append_mem(urlenp->_bb, (char *) data + startpos, endpos - startpos); } // Generate the field and clear the string builder field = bstr_builder_to_str(urlenp->_bb); if (field == NULL) return; bstr_builder_clear(urlenp->_bb); } else { // We only have the current piece to work with, so // no need to involve the string builder field = bstr_dup_mem((char *) data + startpos, endpos - startpos); if (field == NULL) return; } // Process the field differently, depending on the current state if (urlenp->_state == HTP_URLENP_STATE_KEY) { // Store the name for later urlenp->_name = field; if (urlenp->_complete) { // Param with key but no value bstr *name = urlenp->_name; bstr *value = bstr_dup_c(""); if (urlenp->decode_url_encoding) { // htp_uriencoding_normalize_inplace(name); htp_decode_urlencoded_inplace(urlenp->tx->connp->cfg, urlenp->tx, name); } table_addn(urlenp->params, name, value); urlenp->_name = NULL; #ifdef HTP_DEBUG fprint_raw_data(stderr, "NAME", (unsigned char *) bstr_ptr(name), bstr_len(name)); fprint_raw_data(stderr, "VALUE", (unsigned char *) bstr_ptr(value), bstr_len(value)); #endif } } else { // Param with key and value bstr *name = urlenp->_name; bstr *value = field; if (urlenp->decode_url_encoding) { htp_decode_urlencoded_inplace(urlenp->tx->connp->cfg, urlenp->tx, name); htp_decode_urlencoded_inplace(urlenp->tx->connp->cfg, urlenp->tx, value); } table_addn(urlenp->params, name, value); urlenp->_name = NULL; #ifdef HTP_DEBUG fprint_raw_data(stderr, "NAME", (unsigned char *) bstr_ptr(name), bstr_len(name)); fprint_raw_data(stderr, "VALUE", (unsigned char *) bstr_ptr(value), bstr_len(value)); #endif } } else { // Make a copy of the data and store it in an array for later if (endpos - startpos > 0) { bstr_builder_append_mem(urlenp->_bb, (char *) data + startpos, endpos - startpos); } } }
/** * Transcode one bstr. * * @param[in] cd * @param[in] input * @param[in] output */ int htp_transcode_bstr(iconv_t cd, bstr *input, bstr **output) { // Reset conversion state for every new string iconv(cd, NULL, 0, NULL, 0); bstr_builder_t *bb = NULL; const size_t buflen = 10; unsigned char *buf = malloc(buflen); if (buf == NULL) { return HTP_ERROR; } const char *inbuf = (const char *)bstr_ptr(input); size_t inleft = bstr_len(input); char *outbuf = (char *)buf; size_t outleft = buflen; int loop = 1; while (loop) { loop = 0; if (iconv(cd, (ICONV_CONST char **)&inbuf, &inleft, (char **)&outbuf, &outleft) == (size_t) - 1) { if (errno == E2BIG) { // Create bstr builder on-demand if (bb == NULL) { bb = bstr_builder_create(); if (bb == NULL) { free(buf); return HTP_ERROR; } } // The output buffer is full bstr_builder_append_mem(bb, buf, buflen - outleft); outbuf = (char *)buf; outleft = buflen; // Continue in the loop, as there's more work to do loop = 1; } else { // Error if (bb != NULL) bstr_builder_destroy(bb); free(buf); return HTP_ERROR; } } } if (bb != NULL) { bstr_builder_append_mem(bb, buf, buflen - outleft); *output = bstr_builder_to_str(bb); bstr_builder_destroy(bb); if (*output == NULL) { free(buf); return HTP_ERROR; } } else { *output = bstr_dup_mem(buf, buflen - outleft); if (*output == NULL) { free(buf); return HTP_ERROR; } } free(buf); return HTP_OK; }
/** * This method is invoked whenever a piece of data, belonging to a single field (name or value) * becomes available. It will either create a new parameter or store the transient information * until a parameter can be created. * * @param[in] urlenp * @param[in] data * @param[in] startpos * @param[in] endpos * @param[in] c Should contain -1 if the reason this function is called is because the end of * the current data chunk is reached. */ static void htp_urlenp_add_field_piece(htp_urlenp_t *urlenp, const unsigned char *data, size_t startpos, size_t endpos, int last_char) { // Add field if we know it ended (last_char is something other than -1) // or if we know that there won't be any more input data (urlenp->_complete is true). if ((last_char != -1) || (urlenp->_complete)) { // Prepare the field value, assembling from multiple pieces as necessary. bstr *field = NULL; // Did we use the string builder for this field? if (bstr_builder_size(urlenp->_bb) > 0) { // The current field consists of more than once piece, we have to use the string builder. // Add current piece to string builder. if ((data != NULL) && (endpos - startpos > 0)) { bstr_builder_append_mem(urlenp->_bb, data + startpos, endpos - startpos); } // Generate the field and clear the string builder. field = bstr_builder_to_str(urlenp->_bb); if (field == NULL) return; bstr_builder_clear(urlenp->_bb); } else { // We only have the current piece to work with, so no need to involve the string builder. if ((data != NULL) && (endpos - startpos > 0)) { field = bstr_dup_mem(data + startpos, endpos - startpos); if (field == NULL) return; } } // Process field as key or value, as appropriate. if (urlenp->_state == HTP_URLENP_STATE_KEY) { // Key. // If there is no more work left to do, then we have a single key. Add it. if ((urlenp->_complete)||(last_char == urlenp->argument_separator)) { // Handling empty pairs is tricky. We don't want to create a pair for // an entirely empty input, but in some cases it may be appropriate // (e.g., /index.php?&q=2). if ((field != NULL)||(last_char == urlenp->argument_separator)) { // Add one pair, with an empty value and possibly empty key too. bstr *name = field; if (name == NULL) { name = bstr_dup_c(""); if (name == NULL) return; } bstr *value = bstr_dup_c(""); if (value == NULL) { bstr_free(name); return; } if (urlenp->decode_url_encoding) { htp_tx_urldecode_params_inplace(urlenp->tx, name); } htp_table_addn(urlenp->params, name, value); urlenp->_name = NULL; #ifdef HTP_DEBUG fprint_raw_data(stderr, "NAME", bstr_ptr(name), bstr_len(name)); fprint_raw_data(stderr, "VALUE", bstr_ptr(value), bstr_len(value)); #endif } } else { // This key will possibly be followed by a value, so keep it for later. urlenp->_name = field; } } else { // Value (with a key remembered from before). bstr *name = urlenp->_name; urlenp->_name = NULL; if (name == NULL) { name = bstr_dup_c(""); if (name == NULL) { bstr_free(field); return; } } bstr *value = field; if (value == NULL) { value = bstr_dup_c(""); if (value == NULL) { bstr_free(name); return; } } if (urlenp->decode_url_encoding) { htp_tx_urldecode_params_inplace(urlenp->tx, name); htp_tx_urldecode_params_inplace(urlenp->tx, value); } htp_table_addn(urlenp->params, name, value); #ifdef HTP_DEBUG fprint_raw_data(stderr, "NAME", bstr_ptr(name), bstr_len(name)); fprint_raw_data(stderr, "VALUE", bstr_ptr(value), bstr_len(value)); #endif } } else { // The field has not ended. We'll make a copy of of the available data for later. if ((data != NULL) && (endpos - startpos > 0)) { bstr_builder_append_mem(urlenp->_bb, data + startpos, endpos - startpos); } } }
/** * Parses a chunk of multipart/form-data data. This function should be called * as many times as necessary until all data has been consumed. * * @param mpartp * @parma data * @param len * @return Status indicator */ int htp_mpartp_parse(htp_mpartp_t *mpartp, unsigned char *data, size_t len) { size_t pos = 0; // Current position in the input chunk. size_t startpos = 0; // The starting position of data. size_t data_return_pos = 0; // The position of the (possible) boundary. // Loop while there's data in the buffer while (pos < len) { STATE_SWITCH: switch (mpartp->state) { case MULTIPART_STATE_DATA: if ((pos == 0) && (mpartp->cr_aside) && (pos < len)) { mpartp->handle_data(mpartp, (unsigned char *) &"\r", 1, 0); mpartp->cr_aside = 0; } // Loop through available data while (pos < len) { if (data[pos] == CR) { // We have a CR byte // Is this CR the last byte? if (pos + 1 == len) { // We have CR as the last byte in input. We are going to process // what we have in the buffer as data, except for the CR byte, // which we're going to leave for later. If it happens that a // CR is followed by a LF and then a boundary, the CR is going // to be discarded. pos++; // Take CR from input mpartp->cr_aside = 1; } else { // We have CR and at least one more byte in the buffer, so we // are able to test for the LF byte too. if (data[pos + 1] == LF) { pos += 2; // Take CR and LF from input // Prepare to switch to boundary testing data_return_pos = pos; mpartp->boundarypos = pos - startpos; mpartp->bpos = 2; // After LF/first dash mpartp->state = MULTIPART_STATE_BOUNDARY; goto STATE_SWITCH; } else { pos++; mpartp->cr_aside = 0; } } } else if (data[pos] == LF) { // Possible boundary start position (LF line) pos++; // Take LF from input // Prepare to switch to boundary testing data_return_pos = pos; mpartp->boundarypos = pos - startpos; mpartp->bpos = 2; // After LF/first dash mpartp->state = MULTIPART_STATE_BOUNDARY; goto STATE_SWITCH; } else { // Take one byte from input pos++; mpartp->cr_aside = 0; } } // while // End of data; process data chunk mpartp->handle_data(mpartp, data + startpos, pos - startpos - mpartp->cr_aside, 0); break; case MULTIPART_STATE_BOUNDARY: // Possible boundary while (pos < len) { // Remember the first byte in the new line; we'll need to // determine if the line is a part of a folder header. if (mpartp->bpos == 2) { mpartp->first_boundary_byte = data[pos]; } // Check if the bytes match if (!(tolower((int) data[pos]) == mpartp->boundary[mpartp->bpos])) { // Boundary mismatch // Process stored data htp_martp_process_aside(mpartp, 0); // Return back where DATA parsing left off if (mpartp->current_mode == MULTIPART_MODE_LINE) { // In line mode, we process the line mpartp->handle_data(mpartp, data + startpos, data_return_pos - startpos, 1); startpos = data_return_pos; } else { // In data mode, we go back where we left off pos = data_return_pos; } mpartp->state = MULTIPART_STATE_DATA; goto STATE_SWITCH; } // Consume one matched boundary byte pos++; // Have we seen all boundary bytes? if (++mpartp->bpos == mpartp->boundary_len) { // Boundary match! // Process stored data htp_martp_process_aside(mpartp, 1); // Process data prior to the boundary in the local chunk. Because // we know this is the last chunk before boundary, we can remove // the line endings size_t dlen = data_return_pos - startpos; if ((dlen > 0) && (data[startpos + dlen - 1] == LF)) dlen--; if ((dlen > 0) && (data[startpos + dlen - 1] == CR)) dlen--; mpartp->handle_data(mpartp, data + startpos, dlen, 1); // Keep track of how many boundaries we've seen. mpartp->boundary_count++; // Run boundary match. mpartp->handle_boundary(mpartp); // We now need to check if this is the last boundary in the payload mpartp->state = MULTIPART_STATE_BOUNDARY_IS_LAST2; goto STATE_SWITCH; } } // while // No more data in the local chunk; store the unprocessed part for later bstr_builder_append_mem(mpartp->boundary_pieces, (char *) data + startpos, len - startpos); break; case MULTIPART_STATE_BOUNDARY_IS_LAST2: // We're expecting two dashes if (data[pos] == '-') { // Still hoping! pos++; mpartp->state = MULTIPART_STATE_BOUNDARY_IS_LAST1; } else { // Hmpf, it's not the last boundary. mpartp->state = MULTIPART_STATE_BOUNDARY_EAT_LF; } break; case MULTIPART_STATE_BOUNDARY_IS_LAST1: // One more dash left to go if (data[pos] == '-') { // This is indeed the last boundary in the payload pos++; mpartp->seen_last_boundary = 1; mpartp->state = MULTIPART_STATE_BOUNDARY_EAT_LF; } else { // The second character is not a dash. This means that we have // an error in the payload. We should report the error and // continue to eat the rest of the line. // TODO Error mpartp->state = MULTIPART_STATE_BOUNDARY_EAT_LF; } break; case MULTIPART_STATE_BOUNDARY_EAT_LF: if (data[pos] == LF) { pos++; startpos = pos; mpartp->state = MULTIPART_STATE_DATA; } else { // Error! // Unexpected byte; remain in the same state pos++; } break; } // switch } return 1; }
/** * Handles part data. * * @param part * @param data * @param len * @param is_line */ int htp_mpart_part_handle_data(htp_mpart_part_t *part, unsigned char *data, size_t len, int is_line) { // TODO We don't actually need the is_line parameter, because we can // discover that ourselves by looking at the last byte in the buffer. // Keep track of part length part->len += len; // We currently do not process the preamble and epilogue parts if ((part->type == MULTIPART_PART_PREAMBLE) || (part->type == MULTIPART_PART_EPILOGUE)) return 1; if (part->mpartp->current_mode == MULTIPART_MODE_LINE) { // Line mode // TODO Remove the extra characters from folded lines if (is_line) { // End of line // Ignore the line ending if (len > 1) { if (data[len - 1] == LF) len--; if (data[len - 1] == CR) len--; } else if (len > 0) { if (data[len - 1] == LF) len--; } // Is it an empty line? if ((len == 0) && ((bstr_builder_size(part->mpartp->part_pieces) == 0))) { // Empty line; switch to data mode part->mpartp->current_mode = MULTIPART_MODE_DATA; htp_mpart_part_process_headers(part); // TODO RC if (part->file != NULL) { part->type = MULTIPART_PART_FILE; if ((part->mpartp->extract_files) && (part->mpartp->file_count < part->mpartp->extract_limit)) { char buf[255]; strncpy(buf, part->mpartp->extract_dir, 254); strncat(buf, "/libhtp-multipart-file-XXXXXX", 254 - strlen(buf)); part->file->tmpname = strdup(buf); if (part->file->tmpname == NULL) return -1; part->file->fd = mkstemp(part->file->tmpname); if (part->file->fd < 0) return -1; part->mpartp->file_count++; } } else { part->type = MULTIPART_PART_TEXT; } } else { // Not an empty line // Is there a folded line coming after this one? if ((part->mpartp->first_boundary_byte != ' ') && (part->mpartp->first_boundary_byte != '\t')) { // No folded lines after this one, so process header // Do we have more than once piece? if (bstr_builder_size(part->mpartp->part_pieces) > 0) { // Line in pieces bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len); bstr *line = bstr_builder_to_str(part->mpartp->part_pieces); if (line == NULL) return -1; htp_mpartp_parse_header(part, (unsigned char *) bstr_ptr(line), bstr_len(line)); // TODO RC bstr_free(&line); bstr_builder_clear(part->mpartp->part_pieces); } else { // Just this line htp_mpartp_parse_header(part, data, len); // TODO RC } part->mpartp->pieces_form_line = 0; } else { // Folded line, just store this piece for later bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len); part->mpartp->pieces_form_line = 1; } } } else { // Not end of line; keep the data chunk for later bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len); part->mpartp->pieces_form_line = 0; } } else { // Data mode; keep the data chunk for later (but not if it is a file) switch (part->type) { case MULTIPART_PART_TEXT: bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len); break; case MULTIPART_PART_FILE: htp_mpartp_run_request_file_data_hook(part, data, len); // Store data to disk if (part->file->fd != -1) { // TODO RC write(part->file->fd, data, len); } break; } } return 1; }