OVS_CT_ENTRY * OvsConntrackCreateTcpEntry(const TCPHdr *tcp, PNET_BUFFER_LIST nbl, UINT64 now) { struct conn_tcp* newconn; struct tcp_peer *src, *dst; newconn = OvsAllocateMemoryWithTag(sizeof(struct conn_tcp), OVS_CT_POOL_TAG); if (!newconn) { return NULL; } newconn->up = (OVS_CT_ENTRY) {0}; src = &newconn->peer[0]; dst = &newconn->peer[1]; src->seqlo = ntohl(tcp->seq); src->seqhi = src->seqlo + OvsGetTcpPayloadLength(nbl) + 1; if (tcp->flags & TCP_SYN) { src->seqhi++; src->wscale = OvsTcpGetWscale(tcp); } else { src->wscale = CT_WSCALE_UNKNOWN; dst->wscale = CT_WSCALE_UNKNOWN; } src->max_win = MAX(ntohs(tcp->window), 1); if (src->wscale & CT_WSCALE_MASK) { /* Remove scale factor from initial window */ uint8_t sws = src->wscale & CT_WSCALE_MASK; src->max_win = DIV_ROUND_UP((uint32_t) src->max_win, 1 << sws); } if (tcp->flags & TCP_FIN) { src->seqhi++; } dst->seqhi = 1; dst->max_win = 1; src->state = CT_DPIF_TCPS_SYN_SENT; dst->state = CT_DPIF_TCPS_CLOSED; OvsConntrackUpdateExpiration(newconn, now, CT_ENTRY_TIMEOUT); return &newconn->up; }
enum CT_UPDATE_RES OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, const TCPHdr *tcp, PNET_BUFFER_LIST nbl, BOOLEAN reply, UINT64 now) { struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_); /* The peer that sent 'pkt' */ struct tcp_peer *src = &conn->peer[reply ? 1 : 0]; /* The peer that should receive 'pkt' */ struct tcp_peer *dst = &conn->peer[reply ? 0 : 1]; uint8_t sws = 0, dws = 0; UINT16 tcp_flags = ntohs(tcp->flags); uint16_t win = ntohs(tcp->window); uint32_t ack, end, seq, orig_seq; uint32_t p_len = OvsGetTcpPayloadLength(nbl); int ackskew; if (OvsCtInvalidTcpFlags(tcp_flags)) { return CT_UPDATE_INVALID; } if (((tcp_flags & (TCP_SYN|TCP_ACK)) == TCP_SYN) && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 && src->state >= CT_DPIF_TCPS_FIN_WAIT_2) { src->state = dst->state = CT_DPIF_TCPS_CLOSED; return CT_UPDATE_NEW; } if (src->wscale & CT_WSCALE_FLAG && dst->wscale & CT_WSCALE_FLAG && !(tcp_flags & TCP_SYN)) { sws = src->wscale & CT_WSCALE_MASK; dws = dst->wscale & CT_WSCALE_MASK; } else if (src->wscale & CT_WSCALE_UNKNOWN && dst->wscale & CT_WSCALE_UNKNOWN && !(tcp_flags & TCP_SYN)) { sws = TCP_MAX_WSCALE; dws = TCP_MAX_WSCALE; } /* * Sequence tracking algorithm from Guido van Rooij's paper: * http://www.madison-gurkha.com/publications/tcp_filtering/ * tcp_filtering.ps */ orig_seq = seq = ntohl(tcp->seq); if (src->state < CT_DPIF_TCPS_SYN_SENT) { /* First packet from this end. Set its state */ ack = ntohl(tcp->ack_seq); end = seq + p_len; if (tcp_flags & TCP_SYN) { end++; if (dst->wscale & CT_WSCALE_FLAG) { src->wscale = OvsTcpGetWscale(tcp); if (src->wscale & CT_WSCALE_FLAG) { /* Remove scale factor from initial window */ sws = src->wscale & CT_WSCALE_MASK; win = DIV_ROUND_UP((uint32_t) win, 1 << sws); dws = dst->wscale & CT_WSCALE_MASK; } else { /* fixup other window */ dst->max_win <<= dst->wscale & CT_WSCALE_MASK; /* in case of a retrans SYN|ACK */ dst->wscale = 0; } } } if (tcp_flags & TCP_FIN) { end++; } src->seqlo = seq; src->state = CT_DPIF_TCPS_SYN_SENT; /* * May need to slide the window (seqhi may have been set by * the crappy stack check or if we picked up the connection * after establishment) */ if (src->seqhi == 1 || SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) { src->seqhi = end + MAX(1, dst->max_win << dws); } if (win > src->max_win) { src->max_win = win; } } else { ack = ntohl(tcp->ack_seq); end = seq + p_len; if (tcp_flags & TCP_SYN) { end++; } if (tcp_flags & TCP_FIN) { end++; } } if ((tcp_flags & TCP_ACK) == 0) { /* Let it pass through the ack skew check */ ack = dst->seqlo; } else if ((ack == 0 && (tcp_flags & (TCP_ACK|TCP_RST)) == (TCP_ACK|TCP_RST)) /* broken tcp stacks do not set ack */) { /* Many stacks (ours included) will set the ACK number in an * FIN|ACK if the SYN times out -- no sequence to ACK. */ ack = dst->seqlo; } if (seq == end) { /* Ease sequencing restrictions on no data packets */ seq = src->seqlo; end = seq; } ackskew = dst->seqlo - ack; #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ if (SEQ_GEQ(src->seqhi, end) /* Last octet inside other's window space */ && SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) /* Retrans: not more than one window back */ && (ackskew >= -MAXACKWINDOW) /* Acking not more than one reassembled fragment backwards */ && (ackskew <= (MAXACKWINDOW << sws)) /* Acking not more than one window forward */ && ((tcp_flags & TCP_RST) == 0 || orig_seq == src->seqlo || (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { /* Require an exact/+1 sequence match on resets when possible */ /* update max window */ if (src->max_win < win) { src->max_win = win; } /* synchronize sequencing */ if (SEQ_GT(end, src->seqlo)) { src->seqlo = end; } /* slide the window of what the other end can send */ if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) { dst->seqhi = ack + MAX((win << sws), 1); } /* update states */ if (tcp_flags & TCP_SYN && src->state < CT_DPIF_TCPS_SYN_SENT) { src->state = CT_DPIF_TCPS_SYN_SENT; } if (tcp_flags & TCP_FIN && src->state < CT_DPIF_TCPS_CLOSING) { src->state = CT_DPIF_TCPS_CLOSING; } if (tcp_flags & TCP_ACK) { if (dst->state == CT_DPIF_TCPS_SYN_SENT) { dst->state = CT_DPIF_TCPS_ESTABLISHED; } else if (dst->state == CT_DPIF_TCPS_CLOSING) { dst->state = CT_DPIF_TCPS_FIN_WAIT_2; } } if (tcp_flags & TCP_RST) { src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT; } if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2 && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) { OvsConntrackUpdateExpiration(conn, now, 30 * CT_INTERVAL_SEC); } else if (src->state >= CT_DPIF_TCPS_CLOSING && dst->state >= CT_DPIF_TCPS_CLOSING) { OvsConntrackUpdateExpiration(conn, now, 45 * CT_INTERVAL_SEC); } else if (src->state < CT_DPIF_TCPS_ESTABLISHED || dst->state < CT_DPIF_TCPS_ESTABLISHED) { OvsConntrackUpdateExpiration(conn, now, 30 * CT_INTERVAL_SEC); } else if (src->state >= CT_DPIF_TCPS_CLOSING || dst->state >= CT_DPIF_TCPS_CLOSING) { OvsConntrackUpdateExpiration(conn, now, 15 * 60 * CT_INTERVAL_SEC); } else { OvsConntrackUpdateExpiration(conn, now, 24 * 60 * 60 * CT_INTERVAL_SEC); } } else if ((dst->state < CT_DPIF_TCPS_SYN_SENT || dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 || src->state >= CT_DPIF_TCPS_FIN_WAIT_2) && SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) /* Within a window forward of the originating packet */ && SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { /* Within a window backward of the originating packet */ /* * This currently handles three situations: * 1) Stupid stacks will shotgun SYNs before their peer * replies. * 2) When PF catches an already established stream (the * firewall rebooted, the state table was flushed, routes * changed...) * 3) Packets get funky immediately after the connection * closes (this should catch Solaris spurious ACK|FINs * that web servers like to spew after a close) * * This must be a little more careful than the above code * since packet floods will also be caught here. We don't * update the TTL here to mitigate the damage of a packet * flood and so the same code can handle awkward establishment * and a loosened connection close. * In the establishment case, a correct peer response will * validate the connection, go through the normal state code * and keep updating the state TTL. */ /* update max window */ if (src->max_win < win) { src->max_win = win; } /* synchronize sequencing */ if (SEQ_GT(end, src->seqlo)) { src->seqlo = end; } /* slide the window of what the other end can send */ if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) { dst->seqhi = ack + MAX((win << sws), 1); } /* * Cannot set dst->seqhi here since this could be a shotgunned * SYN and not an already established connection. */ if (tcp_flags & TCP_FIN && src->state < CT_DPIF_TCPS_CLOSING) { src->state = CT_DPIF_TCPS_CLOSING; } if (tcp_flags & TCP_RST) { src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT; } } else { return CT_UPDATE_INVALID; } return CT_UPDATE_VALID; }
/* *---------------------------------------------------------------------------- * OvsCtHandleFtp * Extract the FTP control data from the packet and created a related * entry if it's a valid connection. This method doesn't support extended * FTP yet. Supports PORT and PASV commands. * Eg: * 'PORT 192,168,137,103,192,22\r\n' -> '192.168.137.103' and 49174 * '227 Entering Passive Mode (192,168,137,104,194,14)\r\n' gets extracted * to '192.168.137.104' and 49678 *---------------------------------------------------------------------------- */ NDIS_STATUS OvsCtHandleFtp(PNET_BUFFER_LIST curNbl, OvsFlowKey *key, OVS_PACKET_HDR_INFO *layers, UINT64 currentTime, POVS_CT_ENTRY entry, BOOLEAN request) { NDIS_STATUS status; FTP_TYPE ftpType = 0; const char *buf; char temp[256] = { 0 }; char ftpMsg[256] = { 0 }; TCPHdr tcpStorage; const TCPHdr *tcp; tcp = OvsGetTcp(curNbl, layers->l4Offset, &tcpStorage); if (!tcp) { return NDIS_STATUS_INVALID_PACKET; } UINT32 len = OvsGetTcpPayloadLength(curNbl); if (len > sizeof(temp)) { /* We only care up to 256 */ len = sizeof(temp); } buf = OvsGetPacketBytes(curNbl, len, layers->l4Offset + TCP_HDR_LEN(tcp), temp); if (buf == NULL) { return NDIS_STATUS_INVALID_PACKET; } OvsStrlcpy((char *)ftpMsg, (char *)buf, min(len, sizeof(ftpMsg))); char *req = NULL; if (request) { if ((len >= 5) && (OvsStrncmp("PORT", ftpMsg, 4) == 0)) { ftpType = FTP_TYPE_ACTIVE; req = ftpMsg + 4; } } else { if ((len >= 4) && (OvsStrncmp(FTP_PASV_RSP_PREFIX, ftpMsg, 3) == 0)) { ftpType = FTP_TYPE_PASV; /* There are various formats for PASV command. We try to support * some of them. This has been addressed by RFC 2428 - EPSV. * Eg: * 227 Entering Passive Mode (h1,h2,h3,h4,p1,p2). * 227 Entering Passive Mode (h1,h2,h3,h4,p1,p2 * 227 Entering Passive Mode. h1,h2,h3,h4,p1,p2 * 227 =h1,h2,h3,h4,p1,p2 */ char *paren; paren = strchr(ftpMsg, '('); if (paren) { req = paren + 1; } else { /* PASV command without ( */ req = ftpMsg + 3; } } } if (req == NULL) { /* Not a PORT/PASV control packet */ return NDIS_STATUS_SUCCESS; } UINT32 arr[6] = {0}; status = OvsCtExtractNumbers(req, len, arr, 6, ','); if (status != NDIS_STATUS_SUCCESS) { return status; } UINT32 ip = ntohl((arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]); UINT16 port = ntohs(((arr[4] << 8) | arr[5])); switch (ftpType) { case FTP_TYPE_PASV: /* Ensure that the command states Server's IP address */ ASSERT(ip == key->ipKey.nwSrc); OvsCtRelatedEntryCreate(key->ipKey.nwProto, key->l2.dlType, /* Server's IP */ ip, /* Use intended client's IP */ key->ipKey.nwDst, /* Dynamic port opened on server */ port, /* We don't know the client port */ 0, currentTime, entry); break; case FTP_TYPE_ACTIVE: OvsCtRelatedEntryCreate(key->ipKey.nwProto, key->l2.dlType, /* Server's default IP address */ key->ipKey.nwDst, /* Client's IP address */ ip, /* FTP Data Port is 20 */ ntohs(IPPORT_FTP_DATA), /* Port opened up on Client */ port, currentTime, entry); break; default: OVS_LOG_ERROR("invalid ftp type:%d", ftpType); status = NDIS_STATUS_INVALID_PARAMETER; break; } return status; }