TEST(BstrBuilder, Append) { bstr_builder_t *bb = bstr_builder_create(); bstr *str1 = bstr_dup_c("0123456789"); bstr *str2 = bstr_dup_c("abcdefghijklmnopqrstuvwxyz"); EXPECT_EQ(0, bstr_builder_size(bb)); bstr_builder_appendn(bb, str1); bstr_builder_append_c(bb, "#"); bstr_builder_appendn(bb, str2); bstr_builder_append_c(bb, "#"); bstr_builder_append_mem(bb, "!@#$%^&*()", 4); EXPECT_EQ(5, bstr_builder_size(bb)); bstr *result = bstr_builder_to_str(bb); EXPECT_EQ(42, bstr_len(result)); EXPECT_EQ(0, memcmp("0123456789#abcdefghijklmnopqrstuvwxyz#!@#$", bstr_ptr(result),42)); bstr_free(result); bstr_builder_clear(bb); EXPECT_EQ(0, bstr_builder_size(bb)); bstr_builder_destroy(bb); }
/** * Finalize parsing. * * @param mpartp */ int htp_mpartp_finalize(htp_mpartp_t * mpartp) { if (mpartp->current_part != NULL) { htp_martp_process_aside(mpartp, 0); if (htp_mpart_part_finalize_data(mpartp->current_part) < 0) return -1; // TODO RC } bstr_builder_clear(mpartp->boundary_pieces); return 1; }
/** * Creates new multipart part. * * @param mpartp */ htp_mpart_part_t *htp_mpart_part_create(htp_mpartp_t *mpartp) { htp_mpart_part_t * part = calloc(1, sizeof (htp_mpart_part_t)); if (part == NULL) return NULL; part->headers = table_create(4); if (part->headers == NULL) { free(part); return NULL; } part->mpartp = mpartp; part->mpartp->pieces_form_line = 0; bstr_builder_clear(mpartp->part_pieces); return part; }
/** * Finalizes part processing. * * @param part */ int htp_mpart_part_finalize_data(htp_mpart_part_t *part) { // We currently do not process the preamble and epilogue parts if ((part->type == MULTIPART_PART_PREAMBLE) || (part->type == MULTIPART_PART_EPILOGUE)) return 1; if (part->type == MULTIPART_PART_TEXT) { if (bstr_builder_size(part->mpartp->part_pieces) > 0) { part->value = bstr_builder_to_str(part->mpartp->part_pieces); bstr_builder_clear(part->mpartp->part_pieces); } } else if (part->type == MULTIPART_PART_FILE) { htp_mpartp_run_request_file_data_hook(part, NULL, 0); if (part->file->fd != -1) { close(part->file->fd); } } return 1; }
/** * This method is invoked whenever a piece of data, belonging to a single field (name or value) * becomes available. It will either create a new parameter or store the transient information * until a parameter can be created. * * @param urlenp * @param data * @param startpos * @param endpos * @param c Should contain -1 if the reason this function is called is because the end of * the current data chunk is reached. */ static void htp_urlenp_add_field_piece(htp_urlenp_t *urlenp, unsigned char *data, size_t startpos, size_t endpos, int c) { // Add field if we know it ended or if we know that // we've used all of the input data if ((c != -1) || (urlenp->_complete)) { // Add field bstr *field = NULL; // Did we use the string builder for this field? if (bstr_builder_size(urlenp->_bb) > 0) { // The current field consists of more than once piece, // we have to use the string builder // Add current piece to string builder if (endpos - startpos > 0) { bstr_builder_append_mem(urlenp->_bb, (char *) data + startpos, endpos - startpos); } // Generate the field and clear the string builder field = bstr_builder_to_str(urlenp->_bb); if (field == NULL) return; bstr_builder_clear(urlenp->_bb); } else { // We only have the current piece to work with, so // no need to involve the string builder field = bstr_dup_mem((char *) data + startpos, endpos - startpos); if (field == NULL) return; } // Process the field differently, depending on the current state if (urlenp->_state == HTP_URLENP_STATE_KEY) { // Store the name for later urlenp->_name = field; if (urlenp->_complete) { // Param with key but no value bstr *name = urlenp->_name; bstr *value = bstr_dup_c(""); if (urlenp->decode_url_encoding) { // htp_uriencoding_normalize_inplace(name); htp_decode_urlencoded_inplace(urlenp->tx->connp->cfg, urlenp->tx, name); } table_addn(urlenp->params, name, value); urlenp->_name = NULL; #ifdef HTP_DEBUG fprint_raw_data(stderr, "NAME", (unsigned char *) bstr_ptr(name), bstr_len(name)); fprint_raw_data(stderr, "VALUE", (unsigned char *) bstr_ptr(value), bstr_len(value)); #endif } } else { // Param with key and value bstr *name = urlenp->_name; bstr *value = field; if (urlenp->decode_url_encoding) { htp_decode_urlencoded_inplace(urlenp->tx->connp->cfg, urlenp->tx, name); htp_decode_urlencoded_inplace(urlenp->tx->connp->cfg, urlenp->tx, value); } table_addn(urlenp->params, name, value); urlenp->_name = NULL; #ifdef HTP_DEBUG fprint_raw_data(stderr, "NAME", (unsigned char *) bstr_ptr(name), bstr_len(name)); fprint_raw_data(stderr, "VALUE", (unsigned char *) bstr_ptr(value), bstr_len(value)); #endif } } else { // Make a copy of the data and store it in an array for later if (endpos - startpos > 0) { bstr_builder_append_mem(urlenp->_bb, (char *) data + startpos, endpos - startpos); } } }
/** * This method is invoked whenever a piece of data, belonging to a single field (name or value) * becomes available. It will either create a new parameter or store the transient information * until a parameter can be created. * * @param[in] urlenp * @param[in] data * @param[in] startpos * @param[in] endpos * @param[in] c Should contain -1 if the reason this function is called is because the end of * the current data chunk is reached. */ static void htp_urlenp_add_field_piece(htp_urlenp_t *urlenp, const unsigned char *data, size_t startpos, size_t endpos, int last_char) { // Add field if we know it ended (last_char is something other than -1) // or if we know that there won't be any more input data (urlenp->_complete is true). if ((last_char != -1) || (urlenp->_complete)) { // Prepare the field value, assembling from multiple pieces as necessary. bstr *field = NULL; // Did we use the string builder for this field? if (bstr_builder_size(urlenp->_bb) > 0) { // The current field consists of more than once piece, we have to use the string builder. // Add current piece to string builder. if ((data != NULL) && (endpos - startpos > 0)) { bstr_builder_append_mem(urlenp->_bb, data + startpos, endpos - startpos); } // Generate the field and clear the string builder. field = bstr_builder_to_str(urlenp->_bb); if (field == NULL) return; bstr_builder_clear(urlenp->_bb); } else { // We only have the current piece to work with, so no need to involve the string builder. if ((data != NULL) && (endpos - startpos > 0)) { field = bstr_dup_mem(data + startpos, endpos - startpos); if (field == NULL) return; } } // Process field as key or value, as appropriate. if (urlenp->_state == HTP_URLENP_STATE_KEY) { // Key. // If there is no more work left to do, then we have a single key. Add it. if ((urlenp->_complete)||(last_char == urlenp->argument_separator)) { // Handling empty pairs is tricky. We don't want to create a pair for // an entirely empty input, but in some cases it may be appropriate // (e.g., /index.php?&q=2). if ((field != NULL)||(last_char == urlenp->argument_separator)) { // Add one pair, with an empty value and possibly empty key too. bstr *name = field; if (name == NULL) { name = bstr_dup_c(""); if (name == NULL) return; } bstr *value = bstr_dup_c(""); if (value == NULL) { bstr_free(name); return; } if (urlenp->decode_url_encoding) { htp_tx_urldecode_params_inplace(urlenp->tx, name); } htp_table_addn(urlenp->params, name, value); urlenp->_name = NULL; #ifdef HTP_DEBUG fprint_raw_data(stderr, "NAME", bstr_ptr(name), bstr_len(name)); fprint_raw_data(stderr, "VALUE", bstr_ptr(value), bstr_len(value)); #endif } } else { // This key will possibly be followed by a value, so keep it for later. urlenp->_name = field; } } else { // Value (with a key remembered from before). bstr *name = urlenp->_name; urlenp->_name = NULL; if (name == NULL) { name = bstr_dup_c(""); if (name == NULL) { bstr_free(field); return; } } bstr *value = field; if (value == NULL) { value = bstr_dup_c(""); if (value == NULL) { bstr_free(name); return; } } if (urlenp->decode_url_encoding) { htp_tx_urldecode_params_inplace(urlenp->tx, name); htp_tx_urldecode_params_inplace(urlenp->tx, value); } htp_table_addn(urlenp->params, name, value); #ifdef HTP_DEBUG fprint_raw_data(stderr, "NAME", bstr_ptr(name), bstr_len(name)); fprint_raw_data(stderr, "VALUE", bstr_ptr(value), bstr_len(value)); #endif } } else { // The field has not ended. We'll make a copy of of the available data for later. if ((data != NULL) && (endpos - startpos > 0)) { bstr_builder_append_mem(urlenp->_bb, data + startpos, endpos - startpos); } } }
/** * Processes set-aside data. * * @param mpartp * @param data * @param pos * @param startpos * @param return_pos * @param matched */ static int htp_martp_process_aside(htp_mpartp_t *mpartp, int matched) { // The stored data pieces can contain up to one line. If we're in data mode and there // was no boundary match, things are straightforward -- we process everything as data. // If there was a match, we need to take care to not send the line ending as data, nor // anything that follows (because it's going to be a part of the boundary). Similarly, // when we are in line mode, we need to split the first data chunk, processing the first // part as line and the second part as data. // Do we need to do any chunk splitting? if (matched || (mpartp->current_mode == MULTIPART_MODE_LINE)) { // Line mode or boundary match // In line mode, we ignore lone CR bytes mpartp->cr_aside = 0; // We know that we went to match a boundary because // we saw a new line. Now we have to find that line and // process it. It's either going to be in the current chunk, // or in the first stored chunk. if (bstr_builder_size(mpartp->boundary_pieces) > 0) { // We have stored chunks bstr *b = NULL; int first = 1; list_iterator_reset(mpartp->boundary_pieces->pieces); while ((b = list_iterator_next(mpartp->boundary_pieces->pieces)) != NULL) { if (first) { // Split the first chunk if (!matched) { // In line mode, we are OK with line endings mpartp->handle_data(mpartp, (unsigned char *) bstr_ptr(b), mpartp->boundarypos, 1); } else { // But if there was a match, the line ending belongs to the boundary unsigned char *dx = (unsigned char *) bstr_ptr(b); size_t lx = mpartp->boundarypos; // Remove LF or CRLF if ((lx > 0) && (dx[lx - 1] == LF)) { lx--; // Remove CR if ((lx > 0) && (dx[lx - 1] == CR)) { lx--; } } mpartp->handle_data(mpartp, dx, lx, 0); } // The second part of the split chunks belongs to the boundary // when matched, data otherwise. if (!matched) { mpartp->handle_data(mpartp, (unsigned char *) bstr_ptr(b) + mpartp->boundarypos, bstr_len(b) - mpartp->boundarypos, 0); } first = 0; } else { // Do not send data if there was a boundary match. The stored // data belongs to the boundary. if (!matched) { mpartp->handle_data(mpartp, (unsigned char *) bstr_ptr(b), bstr_len(b), 0); } } } bstr_builder_clear(mpartp->boundary_pieces); } } else { // Data mode and no match // In data mode, we process the lone CR byte as data if (mpartp->cr_aside) { mpartp->handle_data(mpartp, (unsigned char *) &"\r", 1, 0 /* Not end of line */); mpartp->cr_aside = 0; } // We then process any pieces that we might have stored, also as data if (bstr_builder_size(mpartp->boundary_pieces) > 0) { bstr *b = NULL; list_iterator_reset(mpartp->boundary_pieces->pieces); while ((b = list_iterator_next(mpartp->boundary_pieces->pieces)) != NULL) { mpartp->handle_data(mpartp, (unsigned char *) bstr_ptr(b), bstr_len(b), 0); } bstr_builder_clear(mpartp->boundary_pieces); } } return 1; }
/** * Handles part data. * * @param part * @param data * @param len * @param is_line */ int htp_mpart_part_handle_data(htp_mpart_part_t *part, unsigned char *data, size_t len, int is_line) { // TODO We don't actually need the is_line parameter, because we can // discover that ourselves by looking at the last byte in the buffer. // Keep track of part length part->len += len; // We currently do not process the preamble and epilogue parts if ((part->type == MULTIPART_PART_PREAMBLE) || (part->type == MULTIPART_PART_EPILOGUE)) return 1; if (part->mpartp->current_mode == MULTIPART_MODE_LINE) { // Line mode // TODO Remove the extra characters from folded lines if (is_line) { // End of line // Ignore the line ending if (len > 1) { if (data[len - 1] == LF) len--; if (data[len - 1] == CR) len--; } else if (len > 0) { if (data[len - 1] == LF) len--; } // Is it an empty line? if ((len == 0) && ((bstr_builder_size(part->mpartp->part_pieces) == 0))) { // Empty line; switch to data mode part->mpartp->current_mode = MULTIPART_MODE_DATA; htp_mpart_part_process_headers(part); // TODO RC if (part->file != NULL) { part->type = MULTIPART_PART_FILE; if ((part->mpartp->extract_files) && (part->mpartp->file_count < part->mpartp->extract_limit)) { char buf[255]; strncpy(buf, part->mpartp->extract_dir, 254); strncat(buf, "/libhtp-multipart-file-XXXXXX", 254 - strlen(buf)); part->file->tmpname = strdup(buf); if (part->file->tmpname == NULL) return -1; part->file->fd = mkstemp(part->file->tmpname); if (part->file->fd < 0) return -1; part->mpartp->file_count++; } } else { part->type = MULTIPART_PART_TEXT; } } else { // Not an empty line // Is there a folded line coming after this one? if ((part->mpartp->first_boundary_byte != ' ') && (part->mpartp->first_boundary_byte != '\t')) { // No folded lines after this one, so process header // Do we have more than once piece? if (bstr_builder_size(part->mpartp->part_pieces) > 0) { // Line in pieces bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len); bstr *line = bstr_builder_to_str(part->mpartp->part_pieces); if (line == NULL) return -1; htp_mpartp_parse_header(part, (unsigned char *) bstr_ptr(line), bstr_len(line)); // TODO RC bstr_free(&line); bstr_builder_clear(part->mpartp->part_pieces); } else { // Just this line htp_mpartp_parse_header(part, data, len); // TODO RC } part->mpartp->pieces_form_line = 0; } else { // Folded line, just store this piece for later bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len); part->mpartp->pieces_form_line = 1; } } } else { // Not end of line; keep the data chunk for later bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len); part->mpartp->pieces_form_line = 0; } } else { // Data mode; keep the data chunk for later (but not if it is a file) switch (part->type) { case MULTIPART_PART_TEXT: bstr_builder_append_mem(part->mpartp->part_pieces, (char *) data, len); break; case MULTIPART_PART_FILE: htp_mpartp_run_request_file_data_hook(part, data, len); // Store data to disk if (part->file->fd != -1) { // TODO RC write(part->file->fd, data, len); } break; } } return 1; }