boost::tuple<int, int> http_parser::incoming(buffer::const_interval recv_buffer) { assert(recv_buffer.left() >= m_recv_buffer.left()); boost::tuple<int, int> ret(0, 0); // early exit if there's nothing new in the receive buffer if (recv_buffer.left() == m_recv_buffer.left()) return ret; m_recv_buffer = recv_buffer; char const* pos = recv_buffer.begin + m_recv_pos; if (m_state == read_status) { assert(!m_finished); char const* newline = std::find(pos, recv_buffer.end, '\n'); // if we don't have a full line yet, wait. if (newline == recv_buffer.end) return ret; if (newline == pos) throw std::runtime_error("unexpected newline in HTTP response"); char const* line_end = newline; if (pos != line_end && *(line_end - 1) == '\r') --line_end; std::istringstream line(std::string(pos, line_end)); ++newline; int incoming = (int)std::distance(pos, newline); m_recv_pos += incoming; boost::get<1>(ret) += incoming; pos = newline; line >> m_protocol; if (m_protocol.substr(0, 5) != "HTTP/") { throw std::runtime_error("unknown protocol in HTTP response: " + m_protocol + " line: " + std::string(pos, newline)); } line >> m_status_code; std::getline(line, m_server_message); m_state = read_header; }
boost::tuple<int, int> http_parser::incoming( buffer::const_interval recv_buffer, bool& error) { TORRENT_ASSERT(recv_buffer.left() >= m_recv_buffer.left()); boost::tuple<int, int> ret(0, 0); int start_pos = m_recv_buffer.left(); // early exit if there's nothing new in the receive buffer if (start_pos == recv_buffer.left()) return ret; m_recv_buffer = recv_buffer; if (m_state == error_state) { error = true; return ret; } char const* pos = recv_buffer.begin + m_recv_pos; restart_response: if (m_state == read_status) { TORRENT_ASSERT(!m_finished); char const* newline = std::find(pos, recv_buffer.end, '\n'); // if we don't have a full line yet, wait. if (newline == recv_buffer.end) { boost::get<1>(ret) += m_recv_buffer.left() - start_pos; return ret; } if (newline == pos) { m_state = error_state; error = true; return ret; } char const* line_end = newline; if (pos != line_end && *(line_end - 1) == '\r') --line_end; char const* line = pos; ++newline; int incoming = int(newline - pos); m_recv_pos += incoming; boost::get<1>(ret) += newline - (m_recv_buffer.begin + start_pos); pos = newline; m_protocol = read_until(line, ' ', line_end); if (m_protocol.substr(0, 5) == "HTTP/") { m_status_code = atoi(read_until(line, ' ', line_end).c_str()); m_server_message = read_until(line, '\r', line_end); } else { m_method = m_protocol; std::transform(m_method.begin(), m_method.end(), m_method.begin(), &to_lower); // the content length is assumed to be 0 for requests m_content_length = 0; m_protocol.clear(); m_path = read_until(line, ' ', line_end); m_protocol = read_until(line, ' ', line_end); m_status_code = 0; } m_state = read_header; start_pos = pos - recv_buffer.begin; } if (m_state == read_header) { TORRENT_ASSERT(!m_finished); char const* newline = std::find(pos, recv_buffer.end, '\n'); std::string line; while (newline != recv_buffer.end && m_state == read_header) { // if the LF character is preceeded by a CR // charachter, don't copy it into the line string. char const* line_end = newline; if (pos != line_end && *(line_end - 1) == '\r') --line_end; line.assign(pos, line_end); ++newline; m_recv_pos += newline - pos; pos = newline; std::string::size_type separator = line.find(':'); if (separator == std::string::npos) { if (m_status_code == 100) { // for 100 Continue, we need to read another response header // before reading the body m_state = read_status; goto restart_response; } // this means we got a blank line, // the header is finished and the body // starts. m_state = read_body; // if this is a request (not a response) // we're done once we reach the end of the headers // if (!m_method.empty()) m_finished = true; // the HTTP header should always be < 2 GB TORRENT_ASSERT(m_recv_pos < INT_MAX); m_body_start_pos = int(m_recv_pos); break; } std::string name = line.substr(0, separator); std::transform(name.begin(), name.end(), name.begin(), &to_lower); ++separator; // skip whitespace while (separator < line.size() && (line[separator] == ' ' || line[separator] == '\t')) ++separator; std::string value = line.substr(separator, std::string::npos); m_header.insert(std::make_pair(name, value)); if (name == "content-length") { m_content_length = strtoll(value.c_str(), 0, 10); } else if (name == "content-range") { bool success = true; char const* ptr = value.c_str(); // apparently some web servers do not send the "bytes" // in their content-range. Don't treat it as an error // if we can't find it, just assume the byte counters // start immediately if (string_begins_no_case("bytes ", ptr)) ptr += 6; char* end; m_range_start = strtoll(ptr, &end, 10); if (end == ptr) success = false; else if (*end != '-') success = false; else { ptr = end + 1; m_range_end = strtoll(ptr, &end, 10); if (end == ptr) success = false; } if (!success || m_range_end < m_range_start) { m_state = error_state; error = true; return ret; } // the http range is inclusive m_content_length = m_range_end - m_range_start + 1; } else if (name == "transfer-encoding") { m_chunked_encoding = string_begins_no_case("chunked", value.c_str()); } TORRENT_ASSERT(m_recv_pos <= recv_buffer.left()); newline = std::find(pos, recv_buffer.end, '\n'); } boost::get<1>(ret) += newline - (m_recv_buffer.begin + start_pos); } if (m_state == read_body) { int incoming = recv_buffer.end - pos; if (m_chunked_encoding && (m_flags & dont_parse_chunks) == 0) { if (m_cur_chunk_end == -1) m_cur_chunk_end = m_body_start_pos; while (m_cur_chunk_end <= m_recv_pos + incoming && !m_finished && incoming > 0) { size_type payload = m_cur_chunk_end - m_recv_pos; if (payload > 0) { TORRENT_ASSERT(payload < INT_MAX); m_recv_pos += payload; boost::get<0>(ret) += int(payload); incoming -= int(payload); } buffer::const_interval buf(recv_buffer.begin + m_cur_chunk_end, recv_buffer.end); size_type chunk_size; int header_size; if (parse_chunk_header(buf, &chunk_size, &header_size)) { if (chunk_size > 0) { std::pair<size_type, size_type> chunk_range(m_cur_chunk_end + header_size , m_cur_chunk_end + header_size + chunk_size); m_chunked_ranges.push_back(chunk_range); } m_cur_chunk_end += header_size + chunk_size; if (chunk_size == 0) { m_finished = true; TORRENT_ASSERT(m_content_length < 0 || m_recv_pos - m_body_start_pos - m_chunk_header_size == m_content_length); } header_size -= m_partial_chunk_header; m_partial_chunk_header = 0; // fprintf(stderr, "parse_chunk_header(%d, -> %d, -> %d) -> %d\n" // " incoming = %d\n m_recv_pos = %d\n m_cur_chunk_end = %d\n" // " content-length = %d\n" // , buf.left(), int(chunk_size), header_size, 1, incoming, int(m_recv_pos) // , m_cur_chunk_end, int(m_content_length)); } else { m_partial_chunk_header += incoming; header_size = incoming; // fprintf(stderr, "parse_chunk_header(%d, -> %d, -> %d) -> %d\n" // " incoming = %d\n m_recv_pos = %d\n m_cur_chunk_end = %d\n" // " content-length = %d\n" // , buf.left(), int(chunk_size), header_size, 0, incoming, int(m_recv_pos) // , m_cur_chunk_end, int(m_content_length)); } m_chunk_header_size += header_size; m_recv_pos += header_size; boost::get<1>(ret) += header_size; incoming -= header_size; } if (incoming > 0) { m_recv_pos += incoming; boost::get<0>(ret) += incoming; // incoming = 0; } } else { size_type payload_received = m_recv_pos - m_body_start_pos + incoming; if (payload_received > m_content_length && m_content_length >= 0) { TORRENT_ASSERT(m_content_length - m_recv_pos + m_body_start_pos < INT_MAX); incoming = int(m_content_length - m_recv_pos + m_body_start_pos); } TORRENT_ASSERT(incoming >= 0); m_recv_pos += incoming; boost::get<0>(ret) += incoming; } if (m_content_length >= 0 && !m_chunked_encoding && m_recv_pos - m_body_start_pos >= m_content_length) { m_finished = true; } } return ret; }