Exemplo n.º 1
0
/*
 * After a timeout, the SACK list may be rebuilt.  This SACK information
 * should be used to avoid retransmitting SACKed data.  This function
 * traverses the SACK list to see if snd_nxt should be moved forward.
 */
void
tcp_sack_adjust(struct tcpcb *tp)
{
    struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes);

//ScenSim-Port//    INP_WLOCK_ASSERT(tp->t_inpcb);
    if (cur == NULL)
        return; /* No holes */
    if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack))
        return; /* We're already beyond any SACKed blocks */
    /*-
     * Two cases for which we want to advance snd_nxt:
     * i) snd_nxt lies between end of one hole and beginning of another
     * ii) snd_nxt lies between end of last hole and snd_fack
     */
    while ((p = TAILQ_NEXT(cur, scblink)) != NULL) {
        if (SEQ_LT(tp->snd_nxt, cur->end))
            return;
        if (SEQ_GEQ(tp->snd_nxt, p->start))
            cur = p;
        else {
            tp->snd_nxt = p->start;
            return;
        }
    }
    if (SEQ_LT(tp->snd_nxt, cur->end))
        return;
    tp->snd_nxt = tp->snd_fack;
}
Exemplo n.º 2
0
static void
dctcp_ack_received(struct cc_var *ccv, uint16_t type)
{
	struct dctcp *dctcp_data;
	int bytes_acked = 0;

	dctcp_data = ccv->cc_data;

	if (CCV(ccv, t_flags) & TF_ECN_PERMIT) {
		/*
		 * DCTCP doesn't treat receipt of ECN marked packet as a
		 * congestion event. Thus, DCTCP always executes the ACK
		 * processing out of congestion recovery.
		 */
		if (IN_CONGRECOVERY(CCV(ccv, t_flags))) {
			EXIT_CONGRECOVERY(CCV(ccv, t_flags));
			newreno_cc_algo.ack_received(ccv, type);
			ENTER_CONGRECOVERY(CCV(ccv, t_flags));
		} else
			newreno_cc_algo.ack_received(ccv, type);

		if (type == CC_DUPACK)
			bytes_acked = CCV(ccv, t_maxseg);

		if (type == CC_ACK)
			bytes_acked = ccv->bytes_this_ack;

		/* Update total bytes. */
		dctcp_data->bytes_total += bytes_acked;

		/* Update total marked bytes. */
		if (dctcp_data->ece_curr) {
			if (!dctcp_data->ece_prev
			    && bytes_acked > CCV(ccv, t_maxseg)) {
				dctcp_data->bytes_ecn +=
				    (bytes_acked - CCV(ccv, t_maxseg));
			} else
				dctcp_data->bytes_ecn += bytes_acked;
			dctcp_data->ece_prev = 1;
		} else {
			if (dctcp_data->ece_prev
			    && bytes_acked > CCV(ccv, t_maxseg))
				dctcp_data->bytes_ecn += CCV(ccv, t_maxseg);
			dctcp_data->ece_prev = 0;
		}
		dctcp_data->ece_curr = 0;

		/*
		 * Update the fraction of marked bytes at the end of
		 * current window size.
		 */
		if ((IN_FASTRECOVERY(CCV(ccv, t_flags)) &&
		    SEQ_GEQ(ccv->curack, CCV(ccv, snd_recover))) ||
		    (!IN_FASTRECOVERY(CCV(ccv, t_flags)) &&
		    SEQ_GT(ccv->curack, dctcp_data->save_sndnxt)))
			dctcp_update_alpha(ccv);
	} else
		newreno_cc_algo.ack_received(ccv, type);
}
Exemplo n.º 3
0
/*
 * To remove a SACK block.
 *
 * Parameters:
 *	sack_blk_t *head: pointer to the array of SACK blks.
 *	tcp_seq end: to remove all sack blk with seq num less than end.
 *	int32_t *num: (referenced) total num of SACK blks in the array.
 */
void
tcp_sack_remove(sack_blk_t *head, tcp_seq end, int32_t *num)
{
	sack_blk_t tmp[MAX_SACK_BLK];
	int32_t i, j, old_num, new_num;

	if (*num == 0)
		return;

	old_num = *num;
	new_num = old_num;
	j = 0;
	/* Walk thru the whole list and copy the new list to tmp[]. */
	for (i = 0; i < old_num; i++) {
		if (SEQ_GT(end, head[i].begin)) {
			/*
			 * Check to see if the old SACK blk needs to be
			 * removed or updated.  If the old blk is just
			 * partially covered, update begin and continue.
			 * If the old blk is completely covered, remove it
			 * and continue to check.
			 */
			if (SEQ_GEQ(end, head[i].end)) {
				new_num--;
				continue;
			} else {
				tmp[j].begin = end;
				tmp[j].end = head[i].end;
			}
		} else {
			tmp[j].begin = head[i].begin;
			tmp[j].end = head[i].end;
		}
		j++;
	}
	/* Copy tmp[] back to the original list. */
	for (i = 0; i < new_num; i++) {
		head[i].begin = tmp[i].begin;
		head[i].end = tmp[i].end;
	}
	*num = new_num;
}
Exemplo n.º 4
0
/**
 *  \brief Update stream with SACK records from a TCP packet.
 *
 *  \param stream The stream to update.
 *  \param p packet to get the SACK records from
 *
 *  \retval -1 error
 *  \retval 0 ok
 */
int StreamTcpSackUpdatePacket(TcpStream *stream, Packet *p) {
    int records = TCP_GET_SACK_CNT(p);
    int record = 0;

    TCPOptSackRecord *sack_rec = (TCPOptSackRecord *)(TCP_GET_SACK_PTR(p));

    for (record = 0; record < records; record++) {
        SCLogDebug("%p last_ack %u, left edge %u, right edge %u", sack_rec,
            stream->last_ack, ntohl(sack_rec->le), ntohl(sack_rec->re));

        if (SEQ_LEQ(ntohl(sack_rec->re), stream->last_ack)) {
            SCLogDebug("record before last_ack");
            goto next;
        }

        /** \todo need a metric to a check for a right edge limit */
/*
        if (SEQ_GT(ntohl(sack_rec->re), stream->next_seq)) {
            SCLogDebug("record beyond next_seq %u", stream->next_seq);
            goto next;
        }
*/
        if (SEQ_GEQ(ntohl(sack_rec->le), ntohl(sack_rec->re))) {
            SCLogDebug("invalid record: le >= re");
            goto next;
        }

        if (StreamTcpSackInsertRange(stream, ntohl(sack_rec->le),
                    ntohl(sack_rec->re)) == -1)
        {
            SCReturnInt(-1);
        }

    next:
        sack_rec++;
    }
#ifdef DEBUG
    StreamTcpSackPrintList(stream);
#endif
    SCReturnInt(0);
}
Exemplo n.º 5
0
/*
 * USTAT PDU / SOS_READY Processor
 *
 * Arguments:
 *	sop	pointer to sscop connection block
 *	m	pointer to PDU buffer (without trailer)
 *	trlr	pointer to PDU trailer
 *
 * Returns:
 *	none
 *
 */
void
sscop_ustat_ready(struct sscop *sop, KBuffer *m, caddr_t trlr)
{
	struct ustat_pdu	*up = (struct ustat_pdu *)trlr;
	struct pdu_hdr	*php;
	sscop_seq	seq1, seq2;

	up->ustat_nmr = ntohl(up->ustat_nmr);
	up->ustat_nr = ntohl(up->ustat_nr);

	/*
	 * Validate peer's current receive data sequence number
	 */
	if (SEQ_GT(sop->so_ack, up->ustat_nr, sop->so_ack) ||
	    SEQ_GEQ(up->ustat_nr, sop->so_send, sop->so_ack)) {
		/*
		 * Bad data sequence number
		 */
		goto goterr;
	}

	/*
	 * Free acknowledged PDUs
	 */
	for (seq1 = sop->so_ack, SEQ_SET(seq2, up->ustat_nr);
			SEQ_LT(seq1, seq2, sop->so_ack);
			SEQ_INCR(seq1, 1)) {
		sscop_pack_free(sop, seq1);
	}

	/*
	 * Update transmit state variables
	 */
	sop->so_ack = seq2;
	SEQ_SET(sop->so_sendmax, up->ustat_nmr);

	/*
	 * Get USTAT list elements
	 */
	SEQ_SET(seq1, ntohl(up->ustat_le1));
	SEQ_SET(seq2, ntohl(up->ustat_le2));

	/*
	 * Validate elements
	 */
	if (SEQ_GT(sop->so_ack, seq1, sop->so_ack) ||
	    SEQ_GEQ(seq1, seq2, sop->so_ack) ||
	    SEQ_GEQ(seq2, sop->so_send, sop->so_ack)) {
		/*
		 * Bad element sequence number
		 */
		goto goterr;
	}

	/*
	 * Process each missing sequence number in this gap
	 */
	while (SEQ_LT(seq1, seq2, sop->so_ack)) {
		/*
		 * Find corresponding SD PDU on pending ack queue
		 */
		php = sscop_pack_locate(sop, seq1);
		if (php == NULL) {
			goto goterr;
		}

		/*
		 * Retransmit this SD PDU if it's not
		 * already scheduled for retranmission.
		 */
		if ((php->ph_rexmit_lk == NULL) &&
		    (sop->so_rexmit_tl != php)) {
			/*
			 * Put PDU on retransmit queue and schedule
			 * transmit servicing
			 */
			sscop_rexmit_insert(sop, php);
			sop->so_flags |= SOF_XMITSRVC;
		}

		/*
		 * Bump to next sequence number
		 */
		SEQ_INCR(seq1, 1);
	}

	/*
	 * Report retransmitted PDUs
	 */
	sscop_maa_error(sop, 'V');

	/*
	 * Free PDU buffer chain
	 */
	KB_FREEALL(m);

	/*
	 * See if transmit queues need servicing
	 */
	if (sop->so_flags & SOF_XMITSRVC)
		sscop_service_xmit(sop);

	return;

goterr:
	/*
	 * Protocol/parameter error encountered
	 */
	sscop_maa_error(sop, 'T');

	/*
	 * Free PDU buffer chain
	 */
	KB_FREEALL(m);

	if (sop->so_vers == SSCOP_VERS_QSAAL)
		/*
		 * Reestablish a new connection
		 */
		qsaal1_reestablish(sop);
	else
		/*
		 * Initiate error recovery
		 */
		q2110_error_recovery(sop);

	return;
}
Exemplo n.º 6
0
/*
 * STAT PDU / SOS_READY Processor
 *
 * Arguments:
 *	sop	pointer to sscop connection block
 *	m	pointer to PDU buffer (without trailer)
 *	trlr	pointer to PDU trailer
 *
 * Returns:
 *	none
 *
 */
void
sscop_stat_ready(struct sscop *sop, KBuffer *m, caddr_t trlr)
{
	struct stat_pdu	*sp = (struct stat_pdu *)trlr;
	struct pdu_hdr	*php;
	KBuffer		*m0 = m;
	sscop_seq	seq1, seq2, opa;
	int		cnt = 0;

	sp->stat_nps = ntohl(sp->stat_nps);
	sp->stat_nmr = ntohl(sp->stat_nmr);
	sp->stat_nr = ntohl(sp->stat_nr);

	/*
	 * Validate peer's received poll sequence number
	 */
	if (SEQ_GT(sop->so_pollack, sp->stat_nps, sop->so_pollack) ||
	    SEQ_GT(sp->stat_nps, sop->so_pollsend, sop->so_pollack)) {
		/*
		 * Bad poll sequence number
		 */
		sscop_maa_error(sop, 'R');
		goto goterr;
	}

	/*
	 * Validate peer's current receive data sequence number
	 */
	if (SEQ_GT(sop->so_ack, sp->stat_nr, sop->so_ack) ||
	    SEQ_GT(sp->stat_nr, sop->so_send, sop->so_ack)) {
		/*
		 * Bad data sequence number
		 */
		sscop_maa_error(sop, 'S');
		goto goterr;
	}

	/*
	 * Free acknowledged PDUs
	 */
	for (seq1 = sop->so_ack, SEQ_SET(seq2, sp->stat_nr);
			SEQ_LT(seq1, seq2, sop->so_ack);
			SEQ_INCR(seq1, 1)) {
		sscop_pack_free(sop, seq1);
	}

	/*
	 * Update transmit state variables
	 */
	opa = sop->so_pollack;
	sop->so_ack = seq2;
	SEQ_SET(sop->so_pollack, sp->stat_nps);
	SEQ_SET(sop->so_sendmax, sp->stat_nmr);

	/*
	 * Get first element in STAT list
	 */
	while (m && (KB_LEN(m) == 0))
		m = KB_NEXT(m);
	if (m == NULL)
		goto done;
	m = sscop_stat_getelem(m, &seq1);

	/*
	 * Make sure there's a second element too
	 */
	if (m == NULL)
		goto done;

	/*
	 * Validate first element (start of missing pdus)
	 */
	if (SEQ_GT(sop->so_ack, seq1, sop->so_ack) ||
	    SEQ_GEQ(seq1, sop->so_send, sop->so_ack)) {
		/*
		 * Bad element sequence number
		 */
		sscop_maa_error(sop, 'S');
		goto goterr;
	}

	/*
	 * Loop thru all STAT elements in list
	 */
	while (m) {
		/*
		 * Get next even element (start of received pdus)
		 */
		m = sscop_stat_getelem(m, &seq2);

		/*
		 * Validate seqence number
		 */
		if (SEQ_GEQ(seq1, seq2, sop->so_ack) ||
		    SEQ_GT(seq2, sop->so_send, sop->so_ack)) {
			/*
			 * Bad element sequence number
			 */
			sscop_maa_error(sop, 'S');
			goto goterr;
		}

		/*
		 * Process each missing sequence number in this gap
		 */
		while (SEQ_LT(seq1, seq2, sop->so_ack)) {
			/*
			 * Find corresponding SD PDU on pending ack queue
			 */
			php = sscop_pack_locate(sop, seq1);
			if (php == NULL) {
				sscop_maa_error(sop, 'S');
				goto goterr;
			}

			/*
			 * Retransmit this SD PDU only if it was last sent
			 * during an earlier poll sequence and it's not
			 * already scheduled for retranmission.
			 */
			if (SEQ_LT(php->ph_nps, sp->stat_nps, opa) &&
			    (php->ph_rexmit_lk == NULL) &&
			    (sop->so_rexmit_tl != php)) {
				/*
				 * Put PDU on retransmit queue and schedule
				 * transmit servicing
				 */
				sscop_rexmit_insert(sop, php);
				sop->so_flags |= SOF_XMITSRVC;
				cnt++;
			}

			/*
			 * Bump to next sequence number
			 */
			SEQ_INCR(seq1, 1);
		}

		/*
		 * Now process series of acknowledged PDUs
		 *
		 * Get next odd element (start of missing pdus),
		 * but make sure there is one and that it's valid
		 */
		if (m == NULL)
			goto done;
		m = sscop_stat_getelem(m, &seq2);
		if (SEQ_GEQ(seq1, seq2, sop->so_ack) ||
		    SEQ_GT(seq2, sop->so_send, sop->so_ack)) {
			/*
			 * Bad element sequence number
			 */
			sscop_maa_error(sop, 'S');
			goto goterr;
		}

		/*
		 * Process each acked sequence number
		 */
		while (SEQ_LT(seq1, seq2, sop->so_ack)) {
			/*
			 * Can we clear transmit buffers ??
			 */
			if ((sop->so_flags & SOF_NOCLRBUF) == 0) {
				/*
				 * Yes, free acked buffers
				 */
				sscop_pack_free(sop, seq1);
			}

			/*
			 * Bump to next sequence number
			 */
			SEQ_INCR(seq1, 1);
		}
	}

done:
	/*
	 * Free PDU buffer chain
	 */
	KB_FREEALL(m0);

	/*
	 * Report retransmitted PDUs
	 */
	if (cnt)
		sscop_maa_error(sop, 'V');

	/*
	 * Record transmit window closed transitions
	 */
	if (SEQ_LT(sop->so_send, sop->so_sendmax, sop->so_ack)) {
		if (sop->so_flags & SOF_NOCREDIT) {
			sop->so_flags &= ~SOF_NOCREDIT;
			sscop_maa_error(sop, 'X');
		}
	} else {
		if ((sop->so_flags & SOF_NOCREDIT) == 0) {
			sop->so_flags |= SOF_NOCREDIT;
			sscop_maa_error(sop, 'W');
		}
	}

	if (sop->so_vers == SSCOP_VERS_QSAAL)
		/*
		 * Restart lost poll/stat timer
		 */
		sop->so_timer[SSCOP_T_NORESP] = sop->so_parm.sp_timeresp;
	else {
		/*
		 * Determine new polling phase
		 */
		if ((sop->so_timer[SSCOP_T_POLL] != 0) &&
		    ((sop->so_flags & SOF_KEEPALIVE) == 0)) {
			/*
			 * Remain in active phase - reset NO-RESPONSE timer
			 */
			sop->so_timer[SSCOP_T_NORESP] =
					sop->so_parm.sp_timeresp;

		} else if (sop->so_timer[SSCOP_T_IDLE] == 0) {
			/*
			 * Go from transient to idle phase
			 */
			sop->so_timer[SSCOP_T_POLL] = 0;
			sop->so_flags &= ~SOF_KEEPALIVE;
			sop->so_timer[SSCOP_T_NORESP] = 0;
			sop->so_timer[SSCOP_T_IDLE] = sop->so_parm.sp_timeidle;
		}
	}

	/*
	 * See if transmit queues need servicing
	 */
	if (sop->so_flags & SOF_XMITSRVC)
		sscop_service_xmit(sop);

	return;

goterr:
	/*
	 * Protocol/parameter error encountered
	 */

	/*
	 * Free PDU buffer chain
	 */
	KB_FREEALL(m0);

	if (sop->so_vers == SSCOP_VERS_QSAAL)
		/*
		 * Reestablish a new connection
		 */
		qsaal1_reestablish(sop);
	else
		/*
		 * Initiate error recovery
		 */
		q2110_error_recovery(sop);

	return;
}
Exemplo n.º 7
0
/*
 * When a new ack with SACK is received, check if it indicates packet
 * reordering. If there is packet reordering, the socket is marked and
 * the late time offset by which the packet was reordered with
 * respect to its closest neighboring packets is computed.
 */
static void
tcp_sack_detect_reordering(struct tcpcb *tp, struct sackhole *s,
    tcp_seq sacked_seq, tcp_seq snd_fack)
{
	int32_t rext = 0, reordered = 0;

	/*
	 * If the SACK hole is past snd_fack, this is from new SACK
	 * information, so we can ignore it.
	 */
	if (SEQ_GT(s->end, snd_fack))
		return;
	/*
	 * If there has been a retransmit timeout, then the timestamp on 
	 * the SACK segment will be newer. This might lead to a
	 * false-positive. Avoid re-ordering detection in this case.
	 */
	if (tp->t_rxtshift > 0)
		return;

	/*
	 * Detect reordering from SACK information by checking
	 * if recently sacked data was never retransmitted from this hole.
	 */
	if (SEQ_LT(s->rxmit, sacked_seq)) {
		reordered = 1;
		tcpstat.tcps_avoid_rxmt++;
	}

	if (reordered) {
		if (tcp_detect_reordering == 1 &&
		    !(tp->t_flagsext & TF_PKTS_REORDERED)) {
			tp->t_flagsext |= TF_PKTS_REORDERED;
			tcpstat.tcps_detect_reordering++;
		}

		tcpstat.tcps_reordered_pkts++;
		tp->t_reordered_pkts++;

		/*
		 * If reordering is seen on a connection wth ECN enabled,
		 * increment the heuristic
		 */
		if (TCP_ECN_ENABLED(tp)) {
			INP_INC_IFNET_STAT(tp->t_inpcb, ecn_fallback_reorder);
			tcpstat.tcps_ecn_fallback_reorder++;
			tcp_heuristic_ecn_aggressive(tp);
		}

		VERIFY(SEQ_GEQ(snd_fack, s->rxmit));

		if (s->rxmit_start > 0) {
			rext = timer_diff(tcp_now, 0, s->rxmit_start, 0);
			if (rext < 0)
				return;

			/*
			 * We take the maximum reorder window to schedule
			 * DELAYFR timer as that will take care of jitter
			 * on the network path.
			 *
			 * Computing average and standard deviation seems
			 * to cause unnecessary retransmissions when there
			 * is high jitter.
			 *
			 * We set a maximum of SRTT/2 and a minimum of
			 * 10 ms on the reorder window.
			 */
			tp->t_reorderwin = max(tp->t_reorderwin, rext);
			tp->t_reorderwin = min(tp->t_reorderwin,
			    (tp->t_srtt >> (TCP_RTT_SHIFT - 1)));
			tp->t_reorderwin = max(tp->t_reorderwin, 10);
		}
	}
Exemplo n.º 8
0
/*
 * This function is called upon receipt of new valid data (while not in header
 * prediction mode), and it updates the ordered list of sacks.
 */
void
tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
{
	/*
	 * First reported block MUST be the most recent one.  Subsequent
	 * blocks SHOULD be in the order in which they arrived at the
	 * receiver.  These two conditions make the implementation fully
	 * compliant with RFC 2018.
	 */
	struct sackblk head_blk, saved_blks[MAX_SACK_BLKS];
	int num_head, num_saved, i;

	/* SACK block for the received segment. */
	head_blk.start = rcv_start;
	head_blk.end = rcv_end;

	/*
	 * Merge updated SACK blocks into head_blk, and
	 * save unchanged SACK blocks into saved_blks[].
	 * num_saved will have the number of the saved SACK blocks.
	 */
	num_saved = 0;
	for (i = 0; i < tp->rcv_numsacks; i++) {
		tcp_seq start = tp->sackblks[i].start;
		tcp_seq end = tp->sackblks[i].end;
		if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) {
			/*
			 * Discard this SACK block.
			 */
		} else if (SEQ_LEQ(head_blk.start, end) &&
			   SEQ_GEQ(head_blk.end, start)) {
			/*
			 * Merge this SACK block into head_blk.
			 * This SACK block itself will be discarded.
			 */
			if (SEQ_GT(head_blk.start, start))
				head_blk.start = start;
			if (SEQ_LT(head_blk.end, end))
				head_blk.end = end;
		} else {
			/*
			 * Save this SACK block.
			 */
			saved_blks[num_saved].start = start;
			saved_blks[num_saved].end = end;
			num_saved++;
		}
	}

	/*
	 * Update SACK list in tp->sackblks[].
	 */
	num_head = 0;
	if (SEQ_GT(head_blk.start, tp->rcv_nxt)) {
		/*
		 * The received data segment is an out-of-order segment.
		 * Put head_blk at the top of SACK list.
		 */
		tp->sackblks[0] = head_blk;
		num_head = 1;
		/*
		 * If the number of saved SACK blocks exceeds its limit,
		 * discard the last SACK block.
		 */
		if (num_saved >= MAX_SACK_BLKS)
			num_saved--;
	}
	if (num_saved > 0) {
		/*
		 * Copy the saved SACK blocks back.
		 */
		bcopy(saved_blks, &tp->sackblks[num_head],
		      sizeof(struct sackblk) * num_saved);
	}

	/* Save the number of SACK blocks. */
	tp->rcv_numsacks = num_head + num_saved;

	/* If we are requesting SACK recovery, reset the stretch-ack state
	 * so that connection will generate more acks after recovery and
	 * sender's cwnd will open.
	 */
	if ((tp->t_flags & TF_STRETCHACK) != 0 && tp->rcv_numsacks > 0)
		tcp_reset_stretch_ack(tp);

#if TRAFFIC_MGT
	if (tp->acc_iaj > 0 && tp->rcv_numsacks > 0) 
		reset_acc_iaj(tp);
#endif /* TRAFFIC_MGT */
}
Exemplo n.º 9
0
/*
 * Tcp output routine: figure out what should be sent and send it.
 */
int
tcp_output(struct tcpcb *tp)
{
	struct socket *so = tp->t_inpcb->inp_socket;
	long len, recwin, sendwin;
	int off, flags, error;
#ifdef TCP_SIGNATURE
	int sigoff = 0;
#endif
	struct mbuf *m;
	struct ip *ip = NULL;
	struct ipovly *ipov = NULL;
	struct tcphdr *th;
	u_char opt[TCP_MAXOLEN];
	unsigned ipoptlen, optlen, hdrlen;
	int idle, sendalot;
	int i, sack_rxmit;
	int sack_bytes_rxmt;
	struct sackhole *p;
#if 0
	int maxburst = TCP_MAXBURST;
#endif
	struct rmxp_tao tao;
#ifdef INET6
	struct ip6_hdr *ip6 = NULL;
	int isipv6;

	bzero(&tao, sizeof(tao));
	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
#endif
#ifdef TCP_ECN
	int needect;
#endif

	INP_LOCK_ASSERT(tp->t_inpcb);

	/*
	 * Determine length of data that should be transmitted,
	 * and flags that will be used.
	 * If there is some data or critical controls (SYN, RST)
	 * to send, then transmit; otherwise, investigate further.
	 */
	idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
	if (idle && (ticks - tp->t_rcvtime) >= tp->t_rxtcur) {
		/*
		 * We have been idle for "a while" and no acks are
		 * expected to clock out any data we send --
		 * slow start to get ack "clock" running again.
		 *
		 * Set the slow-start flight size depending on whether
		 * this is a local network or not.
		 */
		int ss = ss_fltsz;
#ifdef INET6
		if (isipv6) {
			if (in6_localaddr(&tp->t_inpcb->in6p_faddr))
				ss = ss_fltsz_local;
		} else
#endif
		if (in_localaddr(tp->t_inpcb->inp_faddr))
			ss = ss_fltsz_local;
		tp->snd_cwnd = tp->t_maxseg * ss;
	}
	tp->t_flags &= ~TF_LASTIDLE;
	if (idle) {
		if (tp->t_flags & TF_MORETOCOME) {
			tp->t_flags |= TF_LASTIDLE;
			idle = 0;
		}
	}
again:
	/*
	 * If we've recently taken a timeout, snd_max will be greater than
	 * snd_nxt.  There may be SACK information that allows us to avoid
	 * resending already delivered data.  Adjust snd_nxt accordingly.
	 */
	if (tp->sack_enable && SEQ_LT(tp->snd_nxt, tp->snd_max))
		tcp_sack_adjust(tp);
	sendalot = 0;
	off = tp->snd_nxt - tp->snd_una;
	sendwin = min(tp->snd_wnd, tp->snd_cwnd);
	sendwin = min(sendwin, tp->snd_bwnd);

	flags = tcp_outflags[tp->t_state];
	/*
	 * Send any SACK-generated retransmissions.  If we're explicitly trying
	 * to send out new data (when sendalot is 1), bypass this function.
	 * If we retransmit in fast recovery mode, decrement snd_cwnd, since
	 * we're replacing a (future) new transmission with a retransmission
	 * now, and we previously incremented snd_cwnd in tcp_input().
	 */
	/*
	 * Still in sack recovery , reset rxmit flag to zero.
	 */
	sack_rxmit = 0;
	sack_bytes_rxmt = 0;
	len = 0;
	p = NULL;
	if (tp->sack_enable && IN_FASTRECOVERY(tp) &&
	    (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
		long cwin;
		
		cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt;
		if (cwin < 0)
			cwin = 0;
		/* Do not retransmit SACK segments beyond snd_recover */
		if (SEQ_GT(p->end, tp->snd_recover)) {
			/*
			 * (At least) part of sack hole extends beyond
			 * snd_recover. Check to see if we can rexmit data
			 * for this hole.
			 */
			if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
				/*
				 * Can't rexmit any more data for this hole.
				 * That data will be rexmitted in the next
				 * sack recovery episode, when snd_recover
				 * moves past p->rxmit.
				 */
				p = NULL;
				goto after_sack_rexmit;
			} else
				/* Can rexmit part of the current hole */
				len = ((long)ulmin(cwin,
						   tp->snd_recover - p->rxmit));
		} else
			len = ((long)ulmin(cwin, p->end - p->rxmit));
		off = p->rxmit - tp->snd_una;
		KASSERT(off >= 0,("%s: sack block to the left of una : %d",
		    __func__, off));
		if (len > 0) {
			sack_rxmit = 1;
			sendalot = 1;
			tcpstat.tcps_sack_rexmits++;
			tcpstat.tcps_sack_rexmit_bytes +=
			    min(len, tp->t_maxseg);
		}
	}
after_sack_rexmit:
	/*
	 * Get standard flags, and add SYN or FIN if requested by 'hidden'
	 * state flags.
	 */
	if (tp->t_flags & TF_NEEDFIN)
		flags |= TH_FIN;
	if (tp->t_flags & TF_NEEDSYN)
		flags |= TH_SYN;

	SOCKBUF_LOCK(&so->so_snd);
	/*
	 * If in persist timeout with window of 0, send 1 byte.
	 * Otherwise, if window is small but nonzero
	 * and timer expired, we will send what we can
	 * and go to transmit state.
	 */
	if (tp->t_force) {
		if (sendwin == 0) {
			/*
			 * If we still have some data to send, then
			 * clear the FIN bit.  Usually this would
			 * happen below when it realizes that we
			 * aren't sending all the data.  However,
			 * if we have exactly 1 byte of unsent data,
			 * then it won't clear the FIN bit below,
			 * and if we are in persist state, we wind
			 * up sending the packet without recording
			 * that we sent the FIN bit.
			 *
			 * We can't just blindly clear the FIN bit,
			 * because if we don't have any more data
			 * to send then the probe will be the FIN
			 * itself.
			 */
			if (off < so->so_snd.sb_cc)
				flags &= ~TH_FIN;
			sendwin = 1;
		} else {
			callout_stop(tp->tt_persist);
			tp->t_rxtshift = 0;
		}
	}

	/*
	 * If snd_nxt == snd_max and we have transmitted a FIN, the
	 * offset will be > 0 even if so_snd.sb_cc is 0, resulting in
	 * a negative length.  This can also occur when TCP opens up
	 * its congestion window while receiving additional duplicate
	 * acks after fast-retransmit because TCP will reset snd_nxt
	 * to snd_max after the fast-retransmit.
	 *
	 * In the normal retransmit-FIN-only case, however, snd_nxt will
	 * be set to snd_una, the offset will be 0, and the length may
	 * wind up 0.
	 *
	 * If sack_rxmit is true we are retransmitting from the scoreboard
	 * in which case len is already set.
	 */
	if (sack_rxmit == 0) {
		if (sack_bytes_rxmt == 0)
			len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off);
		else {
			long cwin;

                        /*
			 * We are inside of a SACK recovery episode and are
			 * sending new data, having retransmitted all the
			 * data possible in the scoreboard.
			 */
			len = ((long)ulmin(so->so_snd.sb_cc, tp->snd_wnd) 
			       - off);
			/*
			 * Don't remove this (len > 0) check !
			 * We explicitly check for len > 0 here (although it 
			 * isn't really necessary), to work around a gcc 
			 * optimization issue - to force gcc to compute
			 * len above. Without this check, the computation
			 * of len is bungled by the optimizer.
			 */
			if (len > 0) {
				cwin = tp->snd_cwnd - 
					(tp->snd_nxt - tp->sack_newdata) -
					sack_bytes_rxmt;
				if (cwin < 0)
					cwin = 0;
				len = lmin(len, cwin);
			}
		}
	}

	/*
	 * Lop off SYN bit if it has already been sent.  However, if this
	 * is SYN-SENT state and if segment contains data and if we don't
	 * know that foreign host supports TAO, suppress sending segment.
	 */
	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
		flags &= ~TH_SYN;
		off--, len++;
		if (tcp_do_rfc1644)
			tcp_hc_gettao(&tp->t_inpcb->inp_inc, &tao);
		if (len > 0 && tp->t_state == TCPS_SYN_SENT &&
		     tao.tao_ccsent == 0)
			goto just_return;
	}

	/*
	 * Be careful not to send data and/or FIN on SYN segments
	 * in cases when no CC option will be sent.
	 * This measure is needed to prevent interoperability problems
	 * with not fully conformant TCP implementations.
	 */
	if ((flags & TH_SYN) &&
	    ((tp->t_flags & TF_NOOPT) || !(tp->t_flags & TF_REQ_CC) ||
	     ((flags & TH_ACK) && !(tp->t_flags & TF_RCVD_CC)))) {
		len = 0;
		flags &= ~TH_FIN;
	}

	if (len < 0) {
		/*
		 * If FIN has been sent but not acked,
		 * but we haven't been called to retransmit,
		 * len will be < 0.  Otherwise, window shrank
		 * after we sent into it.  If window shrank to 0,
		 * cancel pending retransmit, pull snd_nxt back
		 * to (closed) window, and set the persist timer
		 * if it isn't already going.  If the window didn't
		 * close completely, just wait for an ACK.
		 */
		len = 0;
		if (sendwin == 0) {
			callout_stop(tp->tt_rexmt);
			tp->t_rxtshift = 0;
			tp->snd_nxt = tp->snd_una;
			if (!callout_active(tp->tt_persist))
				tcp_setpersist(tp);
		}
	}

	/*
	 * len will be >= 0 after this point.  Truncate to the maximum
	 * segment length and ensure that FIN is removed if the length
	 * no longer contains the last data byte.
	 */
	if (len > tp->t_maxseg) {
		len = tp->t_maxseg;
		sendalot = 1;
	}
	if (sack_rxmit) {
		if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
			flags &= ~TH_FIN;
	} else {
		if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
			flags &= ~TH_FIN;
	}

	recwin = sbspace(&so->so_rcv);

	/*
	 * Sender silly window avoidance.   We transmit under the following
	 * conditions when len is non-zero:
	 *
	 *	- We have a full segment
	 *	- This is the last buffer in a write()/send() and we are
	 *	  either idle or running NODELAY
	 *	- we've timed out (e.g. persist timer)
	 *	- we have more then 1/2 the maximum send window's worth of
	 *	  data (receiver may be limited the window size)
	 *	- we need to retransmit
	 */
	if (len) {
		if (len == tp->t_maxseg)
			goto send;
		/*
		 * NOTE! on localhost connections an 'ack' from the remote
		 * end may occur synchronously with the output and cause
		 * us to flush a buffer queued with moretocome.  XXX
		 *
		 * note: the len + off check is almost certainly unnecessary.
		 */
		if (!(tp->t_flags & TF_MORETOCOME) &&	/* normal case */
		    (idle || (tp->t_flags & TF_NODELAY)) &&
		    len + off >= so->so_snd.sb_cc &&
		    (tp->t_flags & TF_NOPUSH) == 0) {
			goto send;
		}
		if (tp->t_force)			/* typ. timeout case */
			goto send;
		if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
			goto send;
		if (SEQ_LT(tp->snd_nxt, tp->snd_max))	/* retransmit case */
			goto send;
		if (sack_rxmit)
			goto send;
	}

	/*
	 * Compare available window to amount of window
	 * known to peer (as advertised window less
	 * next expected input).  If the difference is at least two
	 * max size segments, or at least 50% of the maximum possible
	 * window, then want to send a window update to peer.
	 * Skip this if the connection is in T/TCP half-open state.
	 */
	if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN)) {
		/*
		 * "adv" is the amount we can increase the window,
		 * taking into account that we are limited by
		 * TCP_MAXWIN << tp->rcv_scale.
		 */
		long adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale) -
			(tp->rcv_adv - tp->rcv_nxt);

		if (adv >= (long) (2 * tp->t_maxseg))
			goto send;
		if (2 * adv >= (long) so->so_rcv.sb_hiwat)
			goto send;
	}

	/*
	 * Send if we owe the peer an ACK, RST, SYN, or urgent data.  ACKNOW
	 * is also a catch-all for the retransmit timer timeout case.
	 */
	if (tp->t_flags & TF_ACKNOW)
		goto send;
	if ((flags & TH_RST) ||
	    ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
		goto send;
	if (SEQ_GT(tp->snd_up, tp->snd_una))
		goto send;
	/*
	 * If our state indicates that FIN should be sent
	 * and we have not yet done so, then we need to send.
	 */
	if (flags & TH_FIN &&
	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
		goto send;
	/*
	 * In SACK, it is possible for tcp_output to fail to send a segment
	 * after the retransmission timer has been turned off.  Make sure
	 * that the retransmission timer is set.
	 */
	if (tp->sack_enable && SEQ_GT(tp->snd_max, tp->snd_una) &&
	    !callout_active(tp->tt_rexmt) &&
	    !callout_active(tp->tt_persist)) {
		callout_reset(tp->tt_rexmt, tp->t_rxtcur,
			      tcp_timer_rexmt, tp);
		goto just_return;
	} 
	/*
	 * TCP window updates are not reliable, rather a polling protocol
	 * using ``persist'' packets is used to insure receipt of window
	 * updates.  The three ``states'' for the output side are:
	 *	idle			not doing retransmits or persists
	 *	persisting		to move a small or zero window
	 *	(re)transmitting	and thereby not persisting
	 *
	 * callout_active(tp->tt_persist)
	 *	is true when we are in persist state.
	 * tp->t_force
	 *	is set when we are called to send a persist packet.
	 * callout_active(tp->tt_rexmt)
	 *	is set when we are retransmitting
	 * The output side is idle when both timers are zero.
	 *
	 * If send window is too small, there is data to transmit, and no
	 * retransmit or persist is pending, then go to persist state.
	 * If nothing happens soon, send when timer expires:
	 * if window is nonzero, transmit what we can,
	 * otherwise force out a byte.
	 */
	if (so->so_snd.sb_cc && !callout_active(tp->tt_rexmt) &&
	    !callout_active(tp->tt_persist)) {
		tp->t_rxtshift = 0;
		tcp_setpersist(tp);
	}

	/*
	 * No reason to send a segment, just return.
	 */
just_return:
	SOCKBUF_UNLOCK(&so->so_snd);
	return (0);

send:
	SOCKBUF_LOCK_ASSERT(&so->so_snd);
	/*
	 * Before ESTABLISHED, force sending of initial options
	 * unless TCP set not to do any options.
	 * NOTE: we assume that the IP/TCP header plus TCP options
	 * always fit in a single mbuf, leaving room for a maximum
	 * link header, i.e.
	 *	max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES
	 */
	optlen = 0;
#ifdef INET6
	if (isipv6)
		hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
	else
#endif
	hdrlen = sizeof (struct tcpiphdr);
	if (flags & TH_SYN) {
		tp->snd_nxt = tp->iss;
		if ((tp->t_flags & TF_NOOPT) == 0) {
			u_short mss;

			opt[0] = TCPOPT_MAXSEG;
			opt[1] = TCPOLEN_MAXSEG;
			mss = htons((u_short) tcp_mssopt(&tp->t_inpcb->inp_inc));
			(void)memcpy(opt + 2, &mss, sizeof(mss));
			optlen = TCPOLEN_MAXSEG;

			/*
			 * If this is the first SYN of connection (not a SYN
			 * ACK), include SACK_PERMIT_HDR option.  If this is a
			 * SYN ACK, include SACK_PERMIT_HDR option if peer has
			 * already done so. This is only for active connect,
			 * since the syncache takes care of the passive connect.
			 */
			if (tp->sack_enable && ((flags & TH_ACK) == 0 ||
			    (tp->t_flags & TF_SACK_PERMIT))) {
				*((u_int32_t *) (opt + optlen)) =
					htonl(TCPOPT_SACK_PERMIT_HDR);
				optlen += 4;
			}
			if ((tp->t_flags & TF_REQ_SCALE) &&
			    ((flags & TH_ACK) == 0 ||
			    (tp->t_flags & TF_RCVD_SCALE))) {
				*((u_int32_t *)(opt + optlen)) = htonl(
					TCPOPT_NOP << 24 |
					TCPOPT_WINDOW << 16 |
					TCPOLEN_WINDOW << 8 |
					tp->request_r_scale);
				optlen += 4;
			}
		}
	}

	/*
	 * Send a timestamp and echo-reply if this is a SYN and our side
	 * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
	 * and our peer have sent timestamps in our SYN's.
	 */
	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
	    (flags & TH_RST) == 0 &&
	    ((flags & TH_ACK) == 0 ||
	     (tp->t_flags & TF_RCVD_TSTMP))) {
		u_int32_t *lp = (u_int32_t *)(opt + optlen);

		/* Form timestamp option as shown in appendix A of RFC 1323. */
		*lp++ = htonl(TCPOPT_TSTAMP_HDR);
		*lp++ = htonl(ticks);
		*lp   = htonl(tp->ts_recent);
		optlen += TCPOLEN_TSTAMP_APPA;
	}

	/*
	 * Send SACKs if necessary.  This should be the last option processed.
	 * Only as many SACKs are sent as are permitted by the maximum options
	 * size.  No more than three SACKs are sent.
	 */
	if (tp->sack_enable && tp->t_state == TCPS_ESTABLISHED &&
	    (tp->t_flags & (TF_SACK_PERMIT|TF_NOOPT)) == TF_SACK_PERMIT &&
	    tp->rcv_numsacks) {
		u_int32_t *lp = (u_int32_t *)(opt + optlen);
		u_int32_t *olp = lp++;
		int count = 0;  /* actual number of SACKs inserted */
		int maxsack = (MAX_TCPOPTLEN - (optlen + 4))/TCPOLEN_SACK;

		tcpstat.tcps_sack_send_blocks++;
		maxsack = min(maxsack, TCP_MAX_SACK);
		for (i = 0; (i < tp->rcv_numsacks && count < maxsack); i++) {
			struct sackblk sack = tp->sackblks[i];
			if (sack.start == 0 && sack.end == 0)
				continue;
			*lp++ = htonl(sack.start);
			*lp++ = htonl(sack.end);
			count++;
		}
		*olp = htonl(TCPOPT_SACK_HDR|(TCPOLEN_SACK*count+2));
		optlen += TCPOLEN_SACK*count + 4; /* including leading NOPs */
	}
	/*
	 * Send `CC-family' options if our side wants to use them (TF_REQ_CC),
	 * options are allowed (!TF_NOOPT) and it's not a RST.
	 */
	if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
	     (flags & TH_RST) == 0) {
		switch (flags & (TH_SYN|TH_ACK)) {
		/*
		 * This is a normal ACK, send CC if we received CC before
		 * from our peer.
		 */
		case TH_ACK:
			if (!(tp->t_flags & TF_RCVD_CC))
				break;
			/*FALLTHROUGH*/

		/*
		 * We can only get here in T/TCP's SYN_SENT* state, when
		 * we're a sending a non-SYN segment without waiting for
		 * the ACK of our SYN.  A check above assures that we only
		 * do this if our peer understands T/TCP.
		 */
		case 0:
			opt[optlen++] = TCPOPT_NOP;
			opt[optlen++] = TCPOPT_NOP;
			opt[optlen++] = TCPOPT_CC;
			opt[optlen++] = TCPOLEN_CC;
			*(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);

			optlen += 4;
			break;

		/*
		 * This is our initial SYN, check whether we have to use
		 * CC or CC.new.
		 */
		case TH_SYN:
			opt[optlen++] = TCPOPT_NOP;
			opt[optlen++] = TCPOPT_NOP;
			opt[optlen++] = tp->t_flags & TF_SENDCCNEW ?
						TCPOPT_CCNEW : TCPOPT_CC;
			opt[optlen++] = TCPOLEN_CC;
			*(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
			optlen += 4;
			break;

		/*
		 * This is a SYN,ACK; send CC and CC.echo if we received
		 * CC from our peer.
		 */
		case (TH_SYN|TH_ACK):
			if (tp->t_flags & TF_RCVD_CC) {
				opt[optlen++] = TCPOPT_NOP;
				opt[optlen++] = TCPOPT_NOP;
				opt[optlen++] = TCPOPT_CC;
				opt[optlen++] = TCPOLEN_CC;
				*(u_int32_t *)&opt[optlen] =
					htonl(tp->cc_send);
				optlen += 4;
				opt[optlen++] = TCPOPT_NOP;
				opt[optlen++] = TCPOPT_NOP;
				opt[optlen++] = TCPOPT_CCECHO;
				opt[optlen++] = TCPOLEN_CC;
				*(u_int32_t *)&opt[optlen] =
					htonl(tp->cc_recv);
				optlen += 4;
			}
			break;
		}
	}

#ifdef TCP_SIGNATURE
#ifdef INET6
	if (!isipv6)
#endif
	if (tp->t_flags & TF_SIGNATURE) {
		int i;
		u_char *bp;

		/* Initialize TCP-MD5 option (RFC2385) */
		bp = (u_char *)opt + optlen;
		*bp++ = TCPOPT_SIGNATURE;
		*bp++ = TCPOLEN_SIGNATURE;
		sigoff = optlen + 2;
		for (i = 0; i < TCP_SIGLEN; i++)
			*bp++ = 0;
		optlen += TCPOLEN_SIGNATURE;

		/* Terminate options list and maintain 32-bit alignment. */
		*bp++ = TCPOPT_NOP;
		*bp++ = TCPOPT_EOL;
		optlen += 2;
	}
#endif /* TCP_SIGNATURE */

	hdrlen += optlen;

#ifdef INET6
	if (isipv6)
		ipoptlen = ip6_optlen(tp->t_inpcb);
	else
#endif
	if (tp->t_inpcb->inp_options)
		ipoptlen = tp->t_inpcb->inp_options->m_len -
				offsetof(struct ipoption, ipopt_list);
	else
Exemplo n.º 10
0
static int tcpup_state_receive(struct tcpup_info *upp, struct tcpiphdr *tcp, size_t dlen)
{
	int xflags = 0;
	int snd_una = htonl(tcp->th_ack);

	if (tcp->th_flags & TH_RST) {
		upp->t_state = TCPS_CLOSED;
		return 0;
	}

	if ((tcp->th_flags & TH_ACK) && SEQ_GT(snd_una, upp->snd_una)) {
		/* update snd una from peer */
		upp->snd_una = snd_una;
	}

	switch (upp->t_state) {
		case TCPS_SYN_SENT:
			xflags = TH_SYN| TH_ACK;
			if ((tcp->th_flags & xflags) == TH_SYN) {
				assert((tcp->th_flags & TH_FIN) != TH_FIN);
				tcp_state_preload(upp, TCPS_SYN_RECEIVED, htonl(tcp->th_seq) + 1);
				return 0;
			}

			if ((tcp->th_flags & xflags) == xflags
					&& SEQ_GEQ(htonl(tcp->th_ack), upp->snd_nxt)) {
				assert((tcp->th_flags & TH_FIN) != TH_FIN);
				tcp_state_preload(upp, TCPS_ESTABLISHED, htonl(tcp->th_seq));
				return 0;
			}
			break;

		case TCPS_SYN_RECEIVED:
			if ((tcp->th_flags & TH_ACK) == TH_ACK
					&& SEQ_GEQ(htonl(tcp->th_ack), upp->snd_nxt)) {
				assert((tcp->th_flags & TH_FIN) != TH_FIN);
				tcp_state_preload(upp, TCPS_ESTABLISHED, htonl(tcp->th_seq));
				return 0;
			}
			break;

		case TCPS_ESTABLISHED:
			if ((tcp->th_flags & TH_FIN) == TH_FIN) {
				tcp_state_preload(upp, TCPS_CLOSE_WAIT, htonl(tcp->th_seq) + 1);
				return 0;
			}
			break;

		case TCPS_FIN_WAIT_1:
			xflags = TH_FIN| TH_ACK;
			if ((tcp->th_flags & xflags) == xflags
					&& SEQ_GEQ(htonl(tcp->th_ack), upp->snd_nxt)) {
				tcp_state_preload(upp, TCPS_TIME_WAIT, htonl(tcp->th_seq) + dlen + 1);
				return 0;
			}

			if ((tcp->th_flags & TH_FIN) == TH_FIN) {
				tcp_state_preload(upp, TCPS_CLOSING, htonl(tcp->th_seq) + dlen + 1);
				return 0;
			}

			if ((tcp->th_flags & TH_ACK) == TH_ACK
					&& SEQ_GEQ(htonl(tcp->th_ack), upp->snd_nxt)) {
				tcp_state_preload(upp, TCPS_FIN_WAIT_2, htonl(tcp->th_seq) + dlen);
				return 0;
			}
			break;

		case TCPS_FIN_WAIT_2:
			if ((tcp->th_flags & TH_FIN) == TH_FIN) {
				tcp_state_preload(upp, TCPS_TIME_WAIT, htonl(tcp->th_seq) + dlen + 1);
				return 0;
			}
			break;

		case TCPS_CLOSING:
			if ((tcp->th_flags & TH_ACK) == TH_ACK
					&& SEQ_GEQ(htonl(tcp->th_ack), upp->snd_nxt)) {
				tcp_state_preload(upp, TCPS_TIME_WAIT, htonl(tcp->th_seq) + dlen);
				return 0;
			}
			break;

		case TCPS_LAST_ACK:
			if ((tcp->th_flags & TH_ACK) == TH_ACK
					&& SEQ_GEQ(htonl(tcp->th_ack), upp->snd_nxt)) {
				tcp_state_preload(upp, TCPS_CLOSED, htonl(tcp->th_seq) + dlen);
				return 0;
			}
			break;

		case TCPS_TIME_WAIT:
			fprintf(stderr, "before TIME_WAIT -> TIME_WAIT\n");
			break;
	}

	return 0;
}
Exemplo n.º 11
0
/*
 * To insert a new blk to the array of SACK blk in receiver.
 *
 * Parameters:
 *	sack_blk_t *head: pointer to the array of SACK blks.
 *	tcp_seq begin: starting seq num of the new blk.
 *	tcp_seq end: ending seq num of the new blk.
 *	int32_t *num: (referenced) total num of SACK blks on the list.
 */
void
tcp_sack_insert(sack_blk_t *head, tcp_seq begin, tcp_seq end, int32_t *num)
{
	int32_t	i, j, old_num, new_num;
	sack_blk_t tmp[MAX_SACK_BLK - 1];

	/* The array is empty, just add the new one. */
	if (*num == 0) {
		head[0].begin = begin;
		head[0].end = end;
		*num = 1;
		return;
	}

	/*
	 * Check for overlap.  There are five cases.
	 *
	 * 1. there is no overlap with any other SACK blks.
	 * 2. new SACK blk is completely contained in another blk.
	 * 3. tail part of new SACK blk overlaps with another blk.
	 * 4. head part of new SACK blk overlaps with another blk.
	 * 5. new SACK blk completely contains another blk.
	 *
	 * Use tmp to hold old SACK blks.  After the loop, copy them back
	 * to head.
	 */
	old_num = *num;
	if (old_num > MAX_SACK_BLK - 1) {
		old_num = MAX_SACK_BLK - 1;
	}
	new_num = old_num;
	j = 0;
	for (i = 0; i < old_num; i++) {
		if (SEQ_LT(end, head[i].begin) || SEQ_GT(begin, head[i].end)) {
			/* Case 1: continue to check. */
			tmp[j].begin = head[i].begin;
			tmp[j].end = head[i].end;
			j++;
			continue;
		} else if (SEQ_GEQ(begin, head[i].begin) &&
		    SEQ_LEQ(end, head[i].end)) {
			/* Case 2: re-insert the old blk to the head. */
			begin = head[i].begin;
			end = head[i].end;
		} else if (SEQ_LEQ(end, head[i].end) &&
		    SEQ_GEQ(end, head[i].begin)) {
			/*
			 * Case 3: Extend the new blk, remove the old one
			 * and continue to check.
			 */
			end = head[i].end;
		} else if (SEQ_GEQ(begin, head[i].begin) &&
		    SEQ_LEQ(begin, head[i].end)) {
			/* Case 4 */
			begin = head[i].begin;
		}
		/*
		 * Common code for all cases except the first one, which
		 * copies the original SACK blk into the tmp storage.  Other
		 * cases remove the original SACK blk by not copying into
		 * tmp storage.
		 */
		new_num--;
	}

	head[0].begin = begin;
	head[0].end = end;
	for (i = 0; i < new_num; i++) {
		head[i+1].begin = tmp[i].begin;
		head[i+1].end = tmp[i].end;
	}
	*num = new_num + 1;
}
Exemplo n.º 12
0
/*
 * SD PDU / SOS_READY Processor
 *
 * Arguments:
 *	sop	pointer to sscop connection block
 *	m	pointer to PDU buffer (without trailer)
 *	trlr	pointer to PDU trailer
 *
 * Returns:
 *	none
 *
 */
static void
sscop_sd_ready(struct sscop *sop, KBuffer *m, caddr_t trlr)
{
	struct sd_pdu	*sp = (struct sd_pdu *)trlr;
	struct pdu_hdr	*php;
	KBuffer		*n;
	sscop_seq	ns;
	int		err, space;

	/*
	 * Get PDU sequence number
	 */
	SEQ_SET(ns, ntohl(sp->sd_ns));

	/*
	 * Ensure that the sequence number fits within the window
	 */
	if (SEQ_GEQ(ns, sop->so_rcvmax, sop->so_rcvnext)) {
		/*
		 * It doesn't, drop received data
		 */
		KB_FREEALL(m);

		/*
		 * If next highest PDU hasn't reached window end yet,
		 * then send a USTAT to inform transmitter of this gap
		 */
		if (SEQ_LT(sop->so_rcvhigh, sop->so_rcvmax, sop->so_rcvnext)) { 
			sscop_send_ustat(sop, sop->so_rcvmax);
			sop->so_rcvhigh = sop->so_rcvmax;
		}
		return;
	}

	/*
	 * If this is the next in-sequence PDU, hand it to user
	 */
	if (ns == sop->so_rcvnext) {
		STACK_CALL(SSCOP_DATA_IND, sop->so_upper, sop->so_toku,
			sop->so_connvc, (int)m, ns, err);
		if (err) {
			KB_FREEALL(m);
			return;
		}

		/*
		 * Bump next expected sequence number
		 */
		SEQ_INCR(sop->so_rcvnext, 1);

		/*
		 * Slide receive window down
		 */
		SEQ_INCR(sop->so_rcvmax, 1);

		/*
		 * Is this the highest sequence PDU we've received??
		 */
		if (ns == sop->so_rcvhigh) {
			/*
			 * Yes, bump the limit and exit
			 */
			sop->so_rcvhigh = sop->so_rcvnext;
			return;
		}

		/*
		 * This is a retransmitted PDU, so see if we have
		 * more in-sequence PDUs already queued up
		 */
		while ((php = sop->so_recv_hd) &&
		       (php->ph_ns == sop->so_rcvnext)) {

			/*
			 * Yup we do, so remove next PDU from queue and
			 * pass it up to the user as well
			 */
			sop->so_recv_hd = php->ph_recv_lk;
			if (sop->so_recv_hd == NULL)
				sop->so_recv_tl = NULL;
			STACK_CALL(SSCOP_DATA_IND, sop->so_upper, sop->so_toku,
				sop->so_connvc, (int)php->ph_buf, php->ph_ns,
				err);
			if (err) {
				/*
				 * Should never happen, but...
				 */
				KB_FREEALL(php->ph_buf);
				sscop_abort(sop, "stack memory\n");
				return;
			}

			/*
			 * Bump next expected sequence number
			 */
			SEQ_INCR(sop->so_rcvnext, 1);

			/*
			 * Slide receive window down
			 */
			SEQ_INCR(sop->so_rcvmax, 1);
		}

		/*
		 * Finished with data delivery...
		 */
		return;
	}

	/*
	 * We're gonna have to queue this PDU, so find space
	 * for the PDU header
	 */
	KB_HEADROOM(m, space);

	/*
	 * If there's not enough room in the received buffer,
	 * allocate & link a new buffer for the header
	 */
	if (space < sizeof(struct pdu_hdr)) {

		KB_ALLOC(n, sizeof(struct pdu_hdr), KB_F_NOWAIT, KB_T_HEADER);
		if (n == NULL) {
			KB_FREEALL(m);
			return;
		}
		KB_HEADSET(n, sizeof(struct pdu_hdr));
		KB_LEN(n) = 0;
		KB_LINKHEAD(n, m);
		m = n;
	}

	/*
	 * Build PDU header
	 *
	 * We can at least assume/require that the start of
	 * the user data is aligned.  Also note that we don't
	 * include this header in the buffer len/offset fields.
	 */
	KB_DATASTART(m, php, struct pdu_hdr *);
	php--;
	php->ph_ns = ns;
	php->ph_buf = m;

	/*
	 * Insert PDU into the receive queue
	 */
	if (sscop_recv_insert(sop, php)) {
		/*
		 * Oops, a duplicate sequence number PDU is already on
		 * the queue, somethings wrong here.
		 */
		sscop_maa_error(sop, 'Q');

		/*
		 * Free buffers
		 */
		KB_FREEALL(m);

		/*
		 * Go into recovery mode
		 */
		q2110_error_recovery(sop);

		return;
	}

	/*
	 * Are we at the high-water mark??
	 */
	if (ns == sop->so_rcvhigh) {
		/*
		 * Yes, just bump the mark
		 */
		SEQ_INCR(sop->so_rcvhigh, 1);

		return;
	}

	/*
	 * Are we beyond the high-water mark??
	 */
	if (SEQ_GT(ns, sop->so_rcvhigh, sop->so_rcvnext)) {
		/*
		 * Yes, then there's a missing PDU, so inform the transmitter
		 */
		sscop_send_ustat(sop, ns);

		/*
		 * Update high-water mark
		 */
		sop->so_rcvhigh = SEQ_ADD(ns, 1);
	}

	return;
}
Exemplo n.º 13
0
/**
 *  \brief insert a SACK range
 *
 *  \param le left edge in host order
 *  \param re right edge in host order
 *
 *  \retval 0 all is good
 *  \retval -1 error
 */
static int StreamTcpSackInsertRange(TcpStream *stream, uint32_t le, uint32_t re) {
    SCLogDebug("le %u, re %u", le, re);
#ifdef DEBUG
    StreamTcpSackPrintList(stream);
#endif

    /* if to the left of last_ack then ignore */
    if (SEQ_LT(re, stream->last_ack)) {
        SCLogDebug("too far left. discarding");
        goto end;
    }
    /* if to the right of the tcp window then ignore */
    if (SEQ_GT(le, (stream->last_ack + stream->window))) {
        SCLogDebug("too far right. discarding");
        goto end;
    }
    if (stream->sack_head != NULL) {
        StreamTcpSackRecord *rec;

        for (rec = stream->sack_head; rec != NULL; rec = rec->next) {
            SCLogDebug("rec %p, le %u, re %u", rec, rec->le, rec->re);

            if (SEQ_LT(le, rec->le)) {
                SCLogDebug("SEQ_LT(le, rec->le)");
                if (SEQ_LT(re, rec->le)) {
                    SCLogDebug("SEQ_LT(re, rec->le)");
                    // entirely before, prepend
                    StreamTcpSackRecord *stsr = StreamTcpSackRecordAlloc();
                    if (unlikely(stsr == NULL)) {
                        SCReturnInt(-1);
                    }
                    stsr->le = le;
                    stsr->re = re;

                    stsr->next = stream->sack_head;
                    stream->sack_head = stsr;
                    goto end;
                } else if (SEQ_EQ(re, rec->le)) {
                    SCLogDebug("SEQ_EQ(re, rec->le)");
                    // starts before, ends on rec->le, expand
                    rec->le = le;
                } else if (SEQ_GT(re, rec->le)) {
                    SCLogDebug("SEQ_GT(re, rec->le)");
                    // starts before, ends beyond rec->le
                    if (SEQ_LEQ(re, rec->re)) {
                        SCLogDebug("SEQ_LEQ(re, rec->re)");
                        // ends before rec->re, expand
                        rec->le = le;
                    } else { // implied if (re > rec->re)
                        SCLogDebug("implied if (re > rec->re), le set to %u", rec->re);
                        le = rec->re;
                        continue;
                    }
                }
            } else if (SEQ_EQ(le, rec->le)) {
                SCLogDebug("SEQ_EQ(le, rec->le)");
                if (SEQ_LEQ(re, rec->re)) {
                    SCLogDebug("SEQ_LEQ(re, rec->re)");
                    // new record fully overlapped
                    SCReturnInt(0);
                } else { // implied re > rec->re
                    SCLogDebug("implied re > rec->re");
                    if (rec->next != NULL) {
                        if (SEQ_LEQ(re, rec->next->le)) {
                            rec->re = re;
                            goto end;
                        } else {
                            rec->re = rec->next->le;
                            le = rec->next->le;
                            SCLogDebug("le is now %u", le);
                            continue;
                        }
                    } else {
                        rec->re = re;
                        goto end;
                    }
                }
            } else { // implied (le > rec->le)
                SCLogDebug("implied (le > rec->le)");
                if (SEQ_LT(le, rec->re)) {
                    SCLogDebug("SEQ_LT(le, rec->re))");
                    // new record fully overlapped
                    if (SEQ_GT(re, rec->re)) {
                        SCLogDebug("SEQ_GT(re, rec->re)");

                        if (rec->next != NULL) {
                            if (SEQ_LEQ(re, rec->next->le)) {
                                rec->re = re;
                                goto end;
                            } else {
                                rec->re = rec->next->le;
                                le = rec->next->le;
                                continue;
                            }
                        } else {
                            rec->re = re;
                            goto end;
                        }
                    }

                    SCLogDebug("new range fully overlapped");
                    SCReturnInt(0);
                } else if (SEQ_EQ(le, rec->re)) {
                    SCLogDebug("here");
                    // new record fully overlapped
                    //int r = StreamTcpSackInsertRange(stream, rec->re+1, re);
                    //SCReturnInt(r);
                    le = rec->re;
                    continue;
                } else { /* implied le > rec->re */
                    SCLogDebug("implied le > rec->re");
                    if (rec->next == NULL) {
                        SCLogDebug("rec->next == NULL");
                        StreamTcpSackRecord *stsr = StreamTcpSackRecordAlloc();
                        if (unlikely(stsr == NULL)) {
                            SCReturnInt(-1);
                        }
                        stsr->le = le;
                        stsr->re = re;
                        stsr->next = NULL;

                        stream->sack_tail->next = stsr;
                        stream->sack_tail = stsr;
                        goto end;
                    } else {
                        SCLogDebug("implied rec->next != NULL");
                        if (SEQ_LT(le, rec->next->le) && SEQ_LT(re, rec->next->le)) {
                            SCLogDebug("SEQ_LT(le, rec->next->le) && SEQ_LT(re, rec->next->le)");
                            StreamTcpSackRecord *stsr = StreamTcpSackRecordAlloc();
                            if (unlikely(stsr == NULL)) {
                                SCReturnInt(-1);
                            }
                            stsr->le = le;
                            stsr->re = re;
                            stsr->next = rec->next;
                            rec->next = stsr;

                        } else if (SEQ_LT(le, rec->next->le) && SEQ_GEQ(re, rec->next->le)) {
                            SCLogDebug("SEQ_LT(le, rec->next->le) && SEQ_GEQ(re, rec->next->le)");
                            StreamTcpSackRecord *stsr = StreamTcpSackRecordAlloc();
                            if (unlikely(stsr == NULL)) {
                                SCReturnInt(-1);
                            }
                            stsr->le = le;
                            stsr->re = rec->next->le;
                            stsr->next = rec->next;
                            rec->next = stsr;

                            le = rec->next->le;
                        }
                    }
                }
            }
        }
    } else {
        SCLogDebug("implied empty list");
        StreamTcpSackRecord *stsr = StreamTcpSackRecordAlloc();
        if (unlikely(stsr == NULL)) {
            SCReturnInt(-1);
        }
        stsr->le = le;
        stsr->re = re;
        stsr->next = NULL;

        stream->sack_head = stsr;
        stream->sack_tail = stsr;
    }

    StreamTcpSackPruneList(stream);
end:
    SCReturnInt(0);
}
Exemplo n.º 14
0
static INLINE void ApplyFlowDepth(HTTPINSPECT_CONF *ServerConf, Packet *p, 
        HttpSessionData *sd, int resp_header_size, int expected, uint32_t seq_num)
{
    if(!ServerConf->server_flow_depth)
    {
        SetDetectLimit(p, p->dsize);
    }
    else if(ServerConf->server_flow_depth == -1)
    {
        SetDetectLimit(p, resp_header_size);
    }
    else
    {
        if(sd != NULL)
        {
            if(sd->resp_state.is_max_seq )
            {
                if(SEQ_GEQ((sd->resp_state.max_seq), seq_num))
                {
                    if(p->dsize > (sd->resp_state.max_seq- seq_num))
                    {
                        SetDetectLimit(p, (sd->resp_state.max_seq-seq_num));
                        return;
                    }
                    else
                    {
                        SetDetectLimit(p, p->dsize);
                        return;
                    }
                }
                else
                {
                    SetDetectLimit(p, resp_header_size);
                    return;
                }
            }
            else
            {
                if(expected)
                {
                    if(p->dsize > ServerConf->server_flow_depth)
                    {
                        SetDetectLimit(p, ServerConf->server_flow_depth);
                        return;
                    }
                    else
                    {
                        SetDetectLimit(p, p->dsize);
                        return;
                    }
                }
                else
                {
                    SetDetectLimit(p, 0);
                    return;
                }
            }

        }
        else
        {
            SetDetectLimit(p, p->dsize);
        }
    }
}
Exemplo n.º 15
0
void
tcp_timer_rexmt(void *arg)
{
	struct tcpcb *tp = arg;
	uint32_t rto;
#ifdef TCP_DEBUG
	struct socket *so = NULL;
	short ostate;
#endif

	mutex_enter(softnet_lock);
	if ((tp->t_flags & TF_DEAD) != 0) {
		mutex_exit(softnet_lock);
		return;
	}
	if (!callout_expired(&tp->t_timer[TCPT_REXMT])) {
		mutex_exit(softnet_lock);
		return;
	}

	KERNEL_LOCK(1, NULL);
	if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
	    SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
	    SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_ourmss))) {
		extern struct sockaddr_in icmpsrc;
		struct icmp icmp;

		tp->t_flags &= ~TF_PMTUD_PEND;

		/* XXX create fake icmp message with relevant entries */
		icmp.icmp_nextmtu = tp->t_pmtud_nextmtu;
		icmp.icmp_ip.ip_len = tp->t_pmtud_ip_len;
		icmp.icmp_ip.ip_hl = tp->t_pmtud_ip_hl;
		icmpsrc.sin_addr = tp->t_inpcb->inp_faddr;
		icmp_mtudisc(&icmp, icmpsrc.sin_addr);

		/*
		 * Notify all connections to the same peer about
		 * new mss and trigger retransmit.
		 */
		in_pcbnotifyall(&tcbtable, icmpsrc.sin_addr, EMSGSIZE,
		    tcp_mtudisc);
		KERNEL_UNLOCK_ONE(NULL);
		mutex_exit(softnet_lock);
 		return;
 	}
#ifdef TCP_DEBUG
#ifdef INET
	if (tp->t_inpcb)
		so = tp->t_inpcb->inp_socket;
#endif
#ifdef INET6
	if (tp->t_in6pcb)
		so = tp->t_in6pcb->in6p_socket;
#endif
	ostate = tp->t_state;
#endif /* TCP_DEBUG */

	/*
	 * Clear the SACK scoreboard, reset FACK estimate.
	 */
	tcp_free_sackholes(tp);
	tp->snd_fack = tp->snd_una;

	/*
	 * Retransmission timer went off.  Message has not
	 * been acked within retransmit interval.  Back off
	 * to a longer retransmit interval and retransmit one segment.
	 */

	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
		tp->t_rxtshift = TCP_MAXRXTSHIFT;
		TCP_STATINC(TCP_STAT_TIMEOUTDROP);
		tp = tcp_drop(tp, tp->t_softerror ?
		    tp->t_softerror : ETIMEDOUT);
		goto out;
	}
	TCP_STATINC(TCP_STAT_REXMTTIMEO);
	rto = TCP_REXMTVAL(tp);
	if (rto < tp->t_rttmin)
		rto = tp->t_rttmin;
	TCPT_RANGESET(tp->t_rxtcur, rto * tcp_backoff[tp->t_rxtshift],
	    tp->t_rttmin, TCPTV_REXMTMAX);
	TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);

	/*
	 * If we are losing and we are trying path MTU discovery,
	 * try turning it off.  This will avoid black holes in
	 * the network which suppress or fail to send "packet
	 * too big" ICMP messages.  We should ideally do
	 * lots more sophisticated searching to find the right
	 * value here...
	 */
	if (tp->t_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
		TCP_STATINC(TCP_STAT_PMTUBLACKHOLE);

#ifdef INET
		/* try turning PMTUD off */
		if (tp->t_inpcb)
			tp->t_mtudisc = 0;
#endif
#ifdef INET6
		/* try using IPv6 minimum MTU */
		if (tp->t_in6pcb)
			tp->t_mtudisc = 0;
#endif

		/* XXX: more sophisticated Black hole recovery code? */
	}

	/*
	 * If losing, let the lower level know and try for
	 * a better route.  Also, if we backed off this far,
	 * our srtt estimate is probably bogus.  Clobber it
	 * so we'll take the next rtt measurement as our srtt;
	 * move the current srtt into rttvar to keep the current
	 * retransmit times until then.
	 */
	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
#ifdef INET
		if (tp->t_inpcb)
			in_losing(tp->t_inpcb);
#endif
#ifdef INET6
		if (tp->t_in6pcb)
			in6_losing(tp->t_in6pcb);
#endif
		/*
		 * This operation is not described in RFC2988.  The
		 * point is to keep srtt+4*rttvar constant, so we
		 * should shift right 2 bits to divide by 4, and then
		 * shift right one bit because the storage
		 * representation of rttvar is 1/16s vs 1/32s for
		 * srtt.
		 */
		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
		tp->t_srtt = 0;
	}
	tp->snd_nxt = tp->snd_una;
	tp->snd_high = tp->snd_max;
	/*
	 * If timing a segment in this window, stop the timer.
	 */
	tp->t_rtttime = 0;
	/*
	 * Remember if we are retransmitting a SYN, because if
	 * we do, set the initial congestion window must be set
	 * to 1 segment.
	 */
	if (tp->t_state == TCPS_SYN_SENT)
		tp->t_flags |= TF_SYN_REXMT;

	/*
	 * Adjust congestion control parameters.
	 */
	tp->t_congctl->slow_retransmit(tp);

	(void) tcp_output(tp);

 out:
#ifdef TCP_DEBUG
	if (tp && so->so_options & SO_DEBUG)
		tcp_trace(TA_USER, ostate, tp, NULL,
		    PRU_SLOWTIMO | (TCPT_REXMT << 8));
#endif
	KERNEL_UNLOCK_ONE(NULL);
	mutex_exit(softnet_lock);
}
Exemplo n.º 16
0
/*
 * This function is called upon receipt of new valid data (while not in
 * header prediction mode), and it updates the ordered list of sacks.
 */
void
tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
{
    /*
     * First reported block MUST be the most recent one.  Subsequent
     * blocks SHOULD be in the order in which they arrived at the
     * receiver.  These two conditions make the implementation fully
     * compliant with RFC 2018.
     */
    struct sackblk head_blk, saved_blks[MAX_SACK_BLKS];
    int num_head, num_saved, i;

//ScenSim-Port//    INP_WLOCK_ASSERT(tp->t_inpcb);

    /* Check arguments. */
//ScenSim-Port//    KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));

    /* SACK block for the received segment. */
    head_blk.start = rcv_start;
    head_blk.end = rcv_end;

    /*
     * Merge updated SACK blocks into head_blk, and save unchanged SACK
     * blocks into saved_blks[].  num_saved will have the number of the
     * saved SACK blocks.
     */
    num_saved = 0;
    for (i = 0; i < tp->rcv_numsacks; i++) {
        tcp_seq start = tp->sackblks[i].start;
        tcp_seq end = tp->sackblks[i].end;
        if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) {
            /*
             * Discard this SACK block.
             */
        } else if (SEQ_LEQ(head_blk.start, end) &&
               SEQ_GEQ(head_blk.end, start)) {
            /*
             * Merge this SACK block into head_blk.  This SACK
             * block itself will be discarded.
             */
            if (SEQ_GT(head_blk.start, start))
                head_blk.start = start;
            if (SEQ_LT(head_blk.end, end))
                head_blk.end = end;
        } else {
            /*
             * Save this SACK block.
             */
            saved_blks[num_saved].start = start;
            saved_blks[num_saved].end = end;
            num_saved++;
        }
    }

    /*
     * Update SACK list in tp->sackblks[].
     */
    num_head = 0;
    if (SEQ_GT(head_blk.start, tp->rcv_nxt)) {
        /*
         * The received data segment is an out-of-order segment.  Put
         * head_blk at the top of SACK list.
         */
        tp->sackblks[0] = head_blk;
        num_head = 1;
        /*
         * If the number of saved SACK blocks exceeds its limit,
         * discard the last SACK block.
         */
        if (num_saved >= MAX_SACK_BLKS)
            num_saved--;
    }
    if (num_saved > 0) {
        /*
         * Copy the saved SACK blocks back.
         */
        bcopy(saved_blks, &tp->sackblks[num_head],
              sizeof(struct sackblk) * num_saved);
    }

    /* Save the number of SACK blocks. */
    tp->rcv_numsacks = num_head + num_saved;
}
Exemplo n.º 17
0
static int tcpup_state_send(struct tcpup_info *upp, struct tcpiphdr *tcp, size_t dlen)
{
	int xflags = 0;

	if (tcp->th_flags & TH_RST) {
		upp->t_state = TCPS_CLOSED;
		return 0;
	}

	if (upp->x_state != upp->t_state
			&& (tcp->th_flags & TH_ACK)
			&& SEQ_GEQ(htonl(tcp->th_ack), upp->rcv_una)) {
		tcp_state_update(upp, upp->x_state);
		upp->t_state = upp->x_state;
	}

	switch (upp->t_state) {
		case TCPS_CLOSED:
			xflags = TH_SYN| TH_ACK;
			if ((tcp->th_flags & xflags) == TH_SYN) {
				upp->snd_nxt = htonl(tcp->th_seq) + 1;
				upp->snd_max = htonl(tcp->th_seq) + 1;
				upp->snd_una = htonl(tcp->th_seq) + 1;
				tcp_state_update(upp, TCPS_SYN_SENT);
				return 0;
			}
			break;

		case TCPS_SYN_RECEIVED:
			assert((tcp->th_flags & TH_FIN) != TH_FIN);
			xflags = TH_SYN| TH_ACK;
			if ((tcp->th_flags & xflags) == TH_ACK
					&& SEQ_GT(htonl(tcp->th_seq), upp->snd_nxt)) {
				tcp_state_update(upp, upp->x_state);
				return 0;
			}
			break;

		case TCPS_ESTABLISHED:
			if ((tcp->th_flags & TH_FIN) == TH_FIN) {
				upp->snd_nxt = htonl(tcp->th_seq) + dlen + 1;
				tcp_state_update(upp, TCPS_FIN_WAIT_1);
				return 0;
			}
			break;

		case TCPS_CLOSE_WAIT:
			if ((tcp->th_flags & TH_FIN) == TH_FIN) {
				upp->snd_nxt = htonl(tcp->th_seq) + dlen + 1;
				tcp_state_update(upp, TCPS_LAST_ACK);
				return 0;
			}
			break;

		case TCPS_FIN_WAIT_1:
			xflags = TH_FIN| TH_ACK;
			if ((tcp->th_flags & xflags) == TH_ACK) {
				tcp_state_update(upp, upp->x_state);
				return 0;
			}
			break;
	}

	if (dlen > 0) {
		upp->snd_nxt = htonl(tcp->th_seq) + dlen;
		if (SEQ_GT(upp->snd_nxt, upp->snd_max)) {
			/* update snd max to nxt */
			upp->snd_max = upp->snd_nxt;
		}
	}

	return 0;
}
Exemplo n.º 18
0
/*
 * Process cumulative ACK and the TCP SACK option to update the scoreboard.
 * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
 * the sequence space).
 */
void
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
{
    struct sackhole *cur, *temp;
    struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
    int i, j, num_sack_blks;

//ScenSim-Port//    INP_WLOCK_ASSERT(tp->t_inpcb);

    num_sack_blks = 0;
    /*
     * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
     * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
     */
    if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
        sack_blocks[num_sack_blks].start = tp->snd_una;
        sack_blocks[num_sack_blks++].end = th_ack;
    }
    /*
     * Append received valid SACK blocks to sack_blocks[], but only if we
     * received new blocks from the other side.
     */
    if (to->to_flags & TOF_SACK) {
        for (i = 0; i < to->to_nsacks; i++) {
            bcopy((to->to_sacks + i * TCPOLEN_SACK),
                &sack, sizeof(sack));
            sack.start = ntohl(sack.start);
            sack.end = ntohl(sack.end);
            if (SEQ_GT(sack.end, sack.start) &&
                SEQ_GT(sack.start, tp->snd_una) &&
                SEQ_GT(sack.start, th_ack) &&
                SEQ_LT(sack.start, tp->snd_max) &&
                SEQ_GT(sack.end, tp->snd_una) &&
                SEQ_LEQ(sack.end, tp->snd_max))
                sack_blocks[num_sack_blks++] = sack;
        }
    }
    /*
     * Return if SND.UNA is not advanced and no valid SACK block is
     * received.
     */
    if (num_sack_blks == 0)
        return;

    /*
     * Sort the SACK blocks so we can update the scoreboard with just one
     * pass. The overhead of sorting upto 4+1 elements is less than
     * making upto 4+1 passes over the scoreboard.
     */
    for (i = 0; i < num_sack_blks; i++) {
        for (j = i + 1; j < num_sack_blks; j++) {
            if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) {
                sack = sack_blocks[i];
                sack_blocks[i] = sack_blocks[j];
                sack_blocks[j] = sack;
            }
        }
    }
    if (TAILQ_EMPTY(&tp->snd_holes))
        /*
         * Empty scoreboard. Need to initialize snd_fack (it may be
         * uninitialized or have a bogus value). Scoreboard holes
         * (from the sack blocks received) are created later below
         * (in the logic that adds holes to the tail of the
         * scoreboard).
         */
        tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
    /*
     * In the while-loop below, incoming SACK blocks (sack_blocks[]) and
     * SACK holes (snd_holes) are traversed from their tails with just
     * one pass in order to reduce the number of compares especially when
     * the bandwidth-delay product is large.
     *
     * Note: Typically, in the first RTT of SACK recovery, the highest
     * three or four SACK blocks with the same ack number are received.
     * In the second RTT, if retransmitted data segments are not lost,
     * the highest three or four SACK blocks with ack number advancing
     * are received.
     */
    sblkp = &sack_blocks[num_sack_blks - 1];    /* Last SACK block */
    tp->sackhint.last_sack_ack = sblkp->end;
    if (SEQ_LT(tp->snd_fack, sblkp->start)) {
        /*
         * The highest SACK block is beyond fack.  Append new SACK
         * hole at the tail.  If the second or later highest SACK
         * blocks are also beyond the current fack, they will be
         * inserted by way of hole splitting in the while-loop below.
         */
        temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
        if (temp != NULL) {
            tp->snd_fack = sblkp->end;
            /* Go to the previous sack block. */
            sblkp--;
        } else {
            /*
             * We failed to add a new hole based on the current
             * sack block.  Skip over all the sack blocks that
             * fall completely to the right of snd_fack and
             * proceed to trim the scoreboard based on the
             * remaining sack blocks.  This also trims the
             * scoreboard for th_ack (which is sack_blocks[0]).
             */
            while (sblkp >= sack_blocks &&
                   SEQ_LT(tp->snd_fack, sblkp->start))
                sblkp--;
            if (sblkp >= sack_blocks &&
                SEQ_LT(tp->snd_fack, sblkp->end))
                tp->snd_fack = sblkp->end;
        }
    } else if (SEQ_LT(tp->snd_fack, sblkp->end))
        /* fack is advanced. */
        tp->snd_fack = sblkp->end;
    /* We must have at least one SACK hole in scoreboard. */
//ScenSim-Port//    KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
//ScenSim-Port//        ("SACK scoreboard must not be empty"));
    cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */
    /*
     * Since the incoming sack blocks are sorted, we can process them
     * making one sweep of the scoreboard.
     */
    while (sblkp >= sack_blocks  && cur != NULL) {
        if (SEQ_GEQ(sblkp->start, cur->end)) {
            /*
             * SACKs data beyond the current hole.  Go to the
             * previous sack block.
             */
            sblkp--;
            continue;
        }
        if (SEQ_LEQ(sblkp->end, cur->start)) {
            /*
             * SACKs data before the current hole.  Go to the
             * previous hole.
             */
            cur = TAILQ_PREV(cur, sackhole_head, scblink);
            continue;
        }
        tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
//ScenSim-Port//        KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
//ScenSim-Port//            ("sackhint bytes rtx >= 0"));
        if (SEQ_LEQ(sblkp->start, cur->start)) {
            /* Data acks at least the beginning of hole. */
            if (SEQ_GEQ(sblkp->end, cur->end)) {
                /* Acks entire hole, so delete hole. */
                temp = cur;
                cur = TAILQ_PREV(cur, sackhole_head, scblink);
                tcp_sackhole_remove(tp, temp);
                /*
                 * The sack block may ack all or part of the
                 * next hole too, so continue onto the next
                 * hole.
                 */
                continue;
            } else {
                /* Move start of hole forward. */
                cur->start = sblkp->end;
                cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
            }
        } else {
            /* Data acks at least the end of hole. */
            if (SEQ_GEQ(sblkp->end, cur->end)) {
                /* Move end of hole backward. */
                cur->end = sblkp->start;
                cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
            } else {
                /*
                 * ACKs some data in middle of a hole; need
                 * to split current hole
                 */
                temp = tcp_sackhole_insert(tp, sblkp->end,
                    cur->end, cur);
                if (temp != NULL) {
                    if (SEQ_GT(cur->rxmit, temp->rxmit)) {
                        temp->rxmit = cur->rxmit;
                        tp->sackhint.sack_bytes_rexmit
                            += (temp->rxmit
                            - temp->start);
                    }
                    cur->end = sblkp->start;
                    cur->rxmit = SEQ_MIN(cur->rxmit,
                        cur->end);
                }
            }
        }
        tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start);
        /*
         * Testing sblkp->start against cur->start tells us whether
         * we're done with the sack block or the sack hole.
         * Accordingly, we advance one or the other.
         */
        if (SEQ_LEQ(sblkp->start, cur->start))
            cur = TAILQ_PREV(cur, sackhole_head, scblink);
        else
            sblkp--;
    }
}
Exemplo n.º 19
0
enum CT_UPDATE_RES
OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
                           const TCPHdr *tcp,
                           PNET_BUFFER_LIST nbl,
                           BOOLEAN reply,
                           UINT64 now)
{
    struct conn_tcp *conn = OvsCastConntrackEntryToTcpEntry(conn_);
    /* The peer that sent 'pkt' */
    struct tcp_peer *src = &conn->peer[reply ? 1 : 0];
    /* The peer that should receive 'pkt' */
    struct tcp_peer *dst = &conn->peer[reply ? 0 : 1];
    uint8_t sws = 0, dws = 0;
    UINT16 tcp_flags = ntohs(tcp->flags);
    uint16_t win = ntohs(tcp->window);
    uint32_t ack, end, seq, orig_seq;
    uint32_t p_len = OvsGetTcpPayloadLength(nbl);
    int ackskew;

    if (OvsCtInvalidTcpFlags(tcp_flags)) {
        return CT_UPDATE_INVALID;
    }

    if (((tcp_flags & (TCP_SYN|TCP_ACK)) == TCP_SYN)
            && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
            && src->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
        src->state = dst->state = CT_DPIF_TCPS_CLOSED;
        return CT_UPDATE_NEW;
    }

    if (src->wscale & CT_WSCALE_FLAG
        && dst->wscale & CT_WSCALE_FLAG
        && !(tcp_flags & TCP_SYN)) {

        sws = src->wscale & CT_WSCALE_MASK;
        dws = dst->wscale & CT_WSCALE_MASK;

    } else if (src->wscale & CT_WSCALE_UNKNOWN
        && dst->wscale & CT_WSCALE_UNKNOWN
        && !(tcp_flags & TCP_SYN)) {

        sws = TCP_MAX_WSCALE;
        dws = TCP_MAX_WSCALE;
    }

    /*
     * Sequence tracking algorithm from Guido van Rooij's paper:
     *   http://www.madison-gurkha.com/publications/tcp_filtering/
     *      tcp_filtering.ps
     */

    orig_seq = seq = ntohl(tcp->seq);
    if (src->state < CT_DPIF_TCPS_SYN_SENT) {
        /* First packet from this end. Set its state */

        ack = ntohl(tcp->ack_seq);

        end = seq + p_len;
        if (tcp_flags & TCP_SYN) {
            end++;
            if (dst->wscale & CT_WSCALE_FLAG) {
                src->wscale = OvsTcpGetWscale(tcp);
                if (src->wscale & CT_WSCALE_FLAG) {
                    /* Remove scale factor from initial window */
                    sws = src->wscale & CT_WSCALE_MASK;
                    win = DIV_ROUND_UP((uint32_t) win, 1 << sws);
                    dws = dst->wscale & CT_WSCALE_MASK;
                } else {
                    /* fixup other window */
                    dst->max_win <<= dst->wscale & CT_WSCALE_MASK;
                    /* in case of a retrans SYN|ACK */
                    dst->wscale = 0;
                }
            }
        }
        if (tcp_flags & TCP_FIN) {
            end++;
        }

        src->seqlo = seq;
        src->state = CT_DPIF_TCPS_SYN_SENT;
        /*
         * May need to slide the window (seqhi may have been set by
         * the crappy stack check or if we picked up the connection
         * after establishment)
         */
        if (src->seqhi == 1 ||
                SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) {
            src->seqhi = end + MAX(1, dst->max_win << dws);
        }
        if (win > src->max_win) {
            src->max_win = win;
        }

    } else {
        ack = ntohl(tcp->ack_seq);
        end = seq + p_len;
        if (tcp_flags & TCP_SYN) {
            end++;
        }
        if (tcp_flags & TCP_FIN) {
            end++;
        }
    }

    if ((tcp_flags & TCP_ACK) == 0) {
        /* Let it pass through the ack skew check */
        ack = dst->seqlo;
    } else if ((ack == 0
                && (tcp_flags & (TCP_ACK|TCP_RST)) == (TCP_ACK|TCP_RST))
               /* broken tcp stacks do not set ack */) {
        /* Many stacks (ours included) will set the ACK number in an
         * FIN|ACK if the SYN times out -- no sequence to ACK. */
        ack = dst->seqlo;
    }

    if (seq == end) {
        /* Ease sequencing restrictions on no data packets */
        seq = src->seqlo;
        end = seq;
    }

    ackskew = dst->seqlo - ack;
#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
    if (SEQ_GEQ(src->seqhi, end)
        /* Last octet inside other's window space */
        && SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))
        /* Retrans: not more than one window back */
        && (ackskew >= -MAXACKWINDOW)
        /* Acking not more than one reassembled fragment backwards */
        && (ackskew <= (MAXACKWINDOW << sws))
        /* Acking not more than one window forward */
        && ((tcp_flags & TCP_RST) == 0 || orig_seq == src->seqlo
            || (orig_seq == src->seqlo + 1)
            || (orig_seq + 1 == src->seqlo))) {
        /* Require an exact/+1 sequence match on resets when possible */

        /* update max window */
        if (src->max_win < win) {
            src->max_win = win;
        }
        /* synchronize sequencing */
        if (SEQ_GT(end, src->seqlo)) {
            src->seqlo = end;
        }
        /* slide the window of what the other end can send */
        if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) {
            dst->seqhi = ack + MAX((win << sws), 1);
        }

        /* update states */
        if (tcp_flags & TCP_SYN && src->state < CT_DPIF_TCPS_SYN_SENT) {
                src->state = CT_DPIF_TCPS_SYN_SENT;
        }
        if (tcp_flags & TCP_FIN && src->state < CT_DPIF_TCPS_CLOSING) {
                src->state = CT_DPIF_TCPS_CLOSING;
        }
        if (tcp_flags & TCP_ACK) {
            if (dst->state == CT_DPIF_TCPS_SYN_SENT) {
                dst->state = CT_DPIF_TCPS_ESTABLISHED;
            } else if (dst->state == CT_DPIF_TCPS_CLOSING) {
                dst->state = CT_DPIF_TCPS_FIN_WAIT_2;
            }
        }
        if (tcp_flags & TCP_RST) {
            src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT;
        }

        if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2
            && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
            OvsConntrackUpdateExpiration(conn, now, 30 * CT_INTERVAL_SEC);
        } else if (src->state >= CT_DPIF_TCPS_CLOSING
                   && dst->state >= CT_DPIF_TCPS_CLOSING) {
            OvsConntrackUpdateExpiration(conn, now, 45 * CT_INTERVAL_SEC);
        } else if (src->state < CT_DPIF_TCPS_ESTABLISHED
                   || dst->state < CT_DPIF_TCPS_ESTABLISHED) {
            OvsConntrackUpdateExpiration(conn, now, 30 * CT_INTERVAL_SEC);
        } else if (src->state >= CT_DPIF_TCPS_CLOSING
                   || dst->state >= CT_DPIF_TCPS_CLOSING) {
            OvsConntrackUpdateExpiration(conn, now, 15 * 60 * CT_INTERVAL_SEC);
        } else {
            OvsConntrackUpdateExpiration(conn, now, 24 * 60 * 60 * CT_INTERVAL_SEC);
        }
    } else if ((dst->state < CT_DPIF_TCPS_SYN_SENT
                || dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
                || src->state >= CT_DPIF_TCPS_FIN_WAIT_2)
               && SEQ_GEQ(src->seqhi + MAXACKWINDOW, end)
               /* Within a window forward of the originating packet */
               && SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
               /* Within a window backward of the originating packet */

        /*
         * This currently handles three situations:
         *  1) Stupid stacks will shotgun SYNs before their peer
         *     replies.
         *  2) When PF catches an already established stream (the
         *     firewall rebooted, the state table was flushed, routes
         *     changed...)
         *  3) Packets get funky immediately after the connection
         *     closes (this should catch Solaris spurious ACK|FINs
         *     that web servers like to spew after a close)
         *
         * This must be a little more careful than the above code
         * since packet floods will also be caught here. We don't
         * update the TTL here to mitigate the damage of a packet
         * flood and so the same code can handle awkward establishment
         * and a loosened connection close.
         * In the establishment case, a correct peer response will
         * validate the connection, go through the normal state code
         * and keep updating the state TTL.
         */

        /* update max window */
        if (src->max_win < win) {
            src->max_win = win;
        }
        /* synchronize sequencing */
        if (SEQ_GT(end, src->seqlo)) {
            src->seqlo = end;
        }
        /* slide the window of what the other end can send */
        if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) {
            dst->seqhi = ack + MAX((win << sws), 1);
        }

        /*
         * Cannot set dst->seqhi here since this could be a shotgunned
         * SYN and not an already established connection.
         */

        if (tcp_flags & TCP_FIN && src->state < CT_DPIF_TCPS_CLOSING) {
            src->state = CT_DPIF_TCPS_CLOSING;
        }

        if (tcp_flags & TCP_RST) {
            src->state = dst->state = CT_DPIF_TCPS_TIME_WAIT;
        }
    } else {
        return CT_UPDATE_INVALID;
    }

    return CT_UPDATE_VALID;
}
Exemplo n.º 20
0
/* Callback function executed when something is received on fd */
int receive_callback(int file, void *arg) {
  char buf[sizeof(struct rudp_packet)];
  struct sockaddr_in sender;
  size_t sender_length = sizeof(struct sockaddr_in);
  recvfrom(file, &buf, sizeof(struct rudp_packet), 0, (struct sockaddr *)&sender, &sender_length);

  struct rudp_packet *received_packet = malloc(sizeof(struct rudp_packet));
  if(received_packet == NULL) {
    fprintf(stderr, "receive_callback: Error allocating packet\n");
    return -1;
  }
  memcpy(received_packet, &buf, sizeof(struct rudp_packet));
  
  struct rudp_hdr rudpheader = received_packet->header;
  char type[5];
  short t = rudpheader.type;
  if(t == 1)
    strcpy(type, "DATA");
  else if(t == 2)
    strcpy(type, "ACK");
  else if(t == 4)
    strcpy(type, "SYN");
  else if(t==5)
    strcpy(type, "FIN");
  else
    strcpy(type, "BAD");

  printf("Received %s packet from %s:%d seq number=%u on socket=%d\n",type, 
       inet_ntoa(sender.sin_addr), ntohs(sender.sin_port),rudpheader.seqno,file);

  /* Locate the correct socket in the socket list */
  if(socket_list_head == NULL) {
    fprintf(stderr, "Error: attempt to receive on invalid socket. No sockets in the list\n");
    return -1;
  }
  else {
    /* We have sockets to check */
    struct rudp_socket_list *curr_socket = socket_list_head;
    while(curr_socket != NULL) {
      if((int)curr_socket->rsock == file) {
        break;
      }
      curr_socket = curr_socket->next;
    }
    if((int)curr_socket->rsock == file) {
      /* We found the correct socket, now see if a session already exists for this peer */
      if(curr_socket->sessions_list_head == NULL) {
        /* The list is empty, so we check if the sender has initiated the protocol properly (by sending a SYN) */
        if(rudpheader.type == RUDP_SYN) {
          /* SYN Received. Create a new session at the head of the list */
          u_int32_t seqno = rudpheader.seqno + 1;
          create_receiver_session(curr_socket, seqno, &sender);
          /* Respond with an ACK */
          struct rudp_packet *p = create_rudp_packet(RUDP_ACK, seqno, 0, NULL);
          send_packet(true, (rudp_socket_t)file, p, &sender);
          free(p);
        }
        else {
          /* No sessions exist and we got a non-SYN, so ignore it */
        }
      }
      else {
        /* Some sessions exist to be checked */
        bool_t session_found = false;
        struct session *curr_session = curr_socket->sessions_list_head;
        struct session *last_session;
        while(curr_session != NULL) {
          if(curr_session->next == NULL) {
            last_session = curr_session;
          }
          if(compare_sockaddr(&curr_session->address, &sender) == 1) {
            /* Found an existing session */
            session_found = true;
            break;
          }

          curr_session = curr_session->next;
        }
        if(session_found == false) {
          /* No session was found for this peer */
          if(rudpheader.type == RUDP_SYN) {
            /* SYN Received. Send an ACK and create a new session */
            u_int32_t seqno = rudpheader.seqno + 1;
            create_receiver_session(curr_socket, seqno, &sender);          
            struct rudp_packet *p = create_rudp_packet(RUDP_ACK, seqno, 0, NULL);
            send_packet(true, (rudp_socket_t)file, p, &sender);
            free(p);
          }
          else {
            /* Session does not exist and non-SYN received - ignore it */
          }
        }
        else {
          /* We found a matching session */ 
          if(rudpheader.type == RUDP_SYN) {
            if(curr_session->receiver == NULL || curr_session->receiver->status == OPENING) {
              /* Create a new receiver session and ACK the SYN*/
              struct receiver_session *new_receiver_session = malloc(sizeof(struct receiver_session));
              if(new_receiver_session == NULL) {
                fprintf(stderr, "receive_callback: Error allocating receiver session\n");
                return -1;
              }
              new_receiver_session->expected_seqno = rudpheader.seqno + 1;
              new_receiver_session->status = OPENING;
              new_receiver_session->session_finished = false;
              curr_session->receiver = new_receiver_session;

              u_int32_t seqno = curr_session->receiver->expected_seqno;
              struct rudp_packet *p = create_rudp_packet(RUDP_ACK, seqno, 0, NULL);
              send_packet(true, (rudp_socket_t)file, p, &sender);
              free(p);
            }
            else {
              /* Received a SYN when there is already an active receiver session, so we ignore it */
            }
          }
          if(rudpheader.type == RUDP_ACK) {
            u_int32_t ack_sqn = received_packet->header.seqno;
            if(curr_session->sender->status == SYN_SENT) {
              /* This an ACK for a SYN */
              u_int32_t syn_sqn = curr_session->sender->seqno;
              if( (ack_sqn - 1) == syn_sqn) {
                /* Delete the retransmission timeout */
                event_timeout_delete(timeout_callback, curr_session->sender->syn_timeout_arg);
                struct timeoutargs *t = (struct timeoutargs *)curr_session->sender->syn_timeout_arg;
                free(t->packet);
                free(t->recipient);
                free(t);
                curr_session->sender->status = OPEN;
                while(curr_session->sender->data_queue != NULL) {
                  /* Check if the window is already full */
                  if(curr_session->sender->sliding_window[RUDP_WINDOW-1] != NULL) {
                    break;
                  }
                  else {
                    int index;
                    int i;
                    /* Find the first unused window slot */
                    for(i = RUDP_WINDOW-1; i >= 0; i--) {
                      if(curr_session->sender->sliding_window[i] == NULL) {
                        index = i;
                      }
                    }
                    /* Send packet, add to window and remove from queue */
                    u_int32_t seqno = ++syn_sqn;
                    int len = curr_session->sender->data_queue->len;
                    char *payload = curr_session->sender->data_queue->item;
                    struct rudp_packet *datap = create_rudp_packet(RUDP_DATA, seqno, len, payload);
                    curr_session->sender->seqno += 1;
                    curr_session->sender->sliding_window[index] = datap;
                    curr_session->sender->retransmission_attempts[index] = 0;
                    struct data *temp = curr_session->sender->data_queue;
                    curr_session->sender->data_queue = curr_session->sender->data_queue->next;
                    free(temp->item);
                    free(temp);

                    send_packet(false, (rudp_socket_t)file, datap, &sender);
                  }
                }
              }
            }
            else if(curr_session->sender->status == OPEN) {
              /* This is an ACK for DATA */
              if(curr_session->sender->sliding_window[0] != NULL) {
                if(curr_session->sender->sliding_window[0]->header.seqno == (rudpheader.seqno-1)) {
                  /* Correct ACK received. Remove the first window item and shift the rest left */
                  event_timeout_delete(timeout_callback, curr_session->sender->data_timeout_arg[0]);
                  struct timeoutargs *args = (struct timeoutargs *)curr_session->sender->data_timeout_arg[0];
                  free(args->packet);
                  free(args->recipient);
                  free(args);
                  free(curr_session->sender->sliding_window[0]);

                  int i;
                  if(RUDP_WINDOW == 1) {
                    curr_session->sender->sliding_window[0] = NULL;
                    curr_session->sender->retransmission_attempts[0] = 0;
                    curr_session->sender->data_timeout_arg[0] = NULL;
                  }
                  else {
                    for(i = 0; i < RUDP_WINDOW - 1; i++) {
                      curr_session->sender->sliding_window[i] = curr_session->sender->sliding_window[i+1];
                      curr_session->sender->retransmission_attempts[i] = curr_session->sender->retransmission_attempts[i+1];
                      curr_session->sender->data_timeout_arg[i] = curr_session->sender->data_timeout_arg[i+1];

                      if(i == RUDP_WINDOW-2) {
                        curr_session->sender->sliding_window[i+1] = NULL;
                        curr_session->sender->retransmission_attempts[i+1] = 0;
                        curr_session->sender->data_timeout_arg[i+1] = NULL;
                      }
                    }
                  }

                  while(curr_session->sender->data_queue != NULL) {
                    if(curr_session->sender->sliding_window[RUDP_WINDOW-1] != NULL) {
                      break;
                    }
                    else {
                      int index;
                      int i;
                      /* Find the first unused window slot */
                      for(i = RUDP_WINDOW-1; i >= 0; i--) {
                        if(curr_session->sender->sliding_window[i] == NULL) {
                          index = i;
                        }
                      }                      
                      /* Send packet, add to window and remove from queue */
                      curr_session->sender->seqno = curr_session->sender->seqno + 1;                      
                      u_int32_t seqno = curr_session->sender->seqno;
                      int len = curr_session->sender->data_queue->len;
                      char *payload = curr_session->sender->data_queue->item;
                      struct rudp_packet *datap = create_rudp_packet(RUDP_DATA, seqno, len, payload);
                      curr_session->sender->sliding_window[index] = datap;
                      curr_session->sender->retransmission_attempts[index] = 0;
                      struct data *temp = curr_session->sender->data_queue;
                      curr_session->sender->data_queue = curr_session->sender->data_queue->next;
                      free(temp->item);
                      free(temp);
                      send_packet(false, (rudp_socket_t)file, datap, &sender);
                    }
                  }
                  if(curr_socket->close_requested) {
                    /* Can the socket be closed? */
                    struct session *head_sessions = curr_socket->sessions_list_head;
                    while(head_sessions != NULL) {
                      if(head_sessions->sender->session_finished == false) {
                        if(head_sessions->sender->data_queue == NULL &&  
                           head_sessions->sender->sliding_window[0] == NULL && 
                           head_sessions->sender->status == OPEN) {
                          head_sessions->sender->seqno += 1;                      
                          struct rudp_packet *p = create_rudp_packet(RUDP_FIN, head_sessions->sender->seqno, 0, NULL);
                          send_packet(false, (rudp_socket_t)file, p, &head_sessions->address);
                          free(p);
                          head_sessions->sender->status = FIN_SENT;
                        }
                      }
                      head_sessions = head_sessions->next;
                    }
                  }
                }
              }
            }
            else if(curr_session->sender->status == FIN_SENT) {
              /* Handle ACK for FIN */
              if( (curr_session->sender->seqno + 1) == received_packet->header.seqno) {
                event_timeout_delete(timeout_callback, curr_session->sender->fin_timeout_arg);
                struct timeoutargs *t = curr_session->sender->fin_timeout_arg;
                free(t->packet);
                free(t->recipient);
                free(t);
                curr_session->sender->session_finished = true;
                if(curr_socket->close_requested) {
                  /* See if we can close the socket */
                  struct session *head_sessions = curr_socket->sessions_list_head;
                  bool_t all_done = true;
                  while(head_sessions != NULL) {
                    if(head_sessions->sender->session_finished == false) {
                      all_done = false;
                    }
                    else if(head_sessions->receiver != NULL && head_sessions->receiver->session_finished == false) {
                      all_done = false;
                    }
                    else {
                      free(head_sessions->sender);
                      if(head_sessions->receiver) {
                        free(head_sessions->receiver);
                      }
                    }

                    struct session *temp = head_sessions;
                    head_sessions = head_sessions->next;
                    free(temp);
                  }
                  if(all_done) {
                    if(curr_socket->handler != NULL) {
                      curr_socket->handler((rudp_socket_t)file, RUDP_EVENT_CLOSED, &sender);
                      event_fd_delete(receive_callback, (rudp_socket_t)file);
                      close(file);
                      free(curr_socket);
                    }
                  }
                }
              }
              else {
                /* Received incorrect ACK for FIN - ignore it */
              }
            }
          }
          else if(rudpheader.type == RUDP_DATA) {
            /* Handle DATA packet. If the receiver is OPENING, it can transition to OPEN */
            if(curr_session->receiver->status == OPENING) {
              if(rudpheader.seqno == curr_session->receiver->expected_seqno) {
                curr_session->receiver->status = OPEN;
              }
            }

            if(rudpheader.seqno == curr_session->receiver->expected_seqno) {
              /* Sequence numbers match - ACK the data */
              u_int32_t seqno = rudpheader.seqno + 1;
              curr_session->receiver->expected_seqno = seqno;
              struct rudp_packet *p = create_rudp_packet(RUDP_ACK, seqno, 0, NULL);
  
              send_packet(true, (rudp_socket_t)file, p, &sender);
              free(p);
              
              /* Pass the data up to the application */
              if(curr_socket->recv_handler != NULL)
                curr_socket->recv_handler((rudp_socket_t)file, &sender, 
                              (void*)&received_packet->payload, received_packet->payload_length);
            }
            /* Handle the case where an ACK was lost */
            else if(SEQ_GEQ(rudpheader.seqno, (curr_session->receiver->expected_seqno - RUDP_WINDOW)) &&
                SEQ_LT(rudpheader.seqno, curr_session->receiver->expected_seqno)) {
              u_int32_t seqno = rudpheader.seqno + 1;
              struct rudp_packet *p = create_rudp_packet(RUDP_ACK, seqno, 0, NULL);
              send_packet(true, (rudp_socket_t)file, p, &sender);
              free(p);
            }
          }
          else if(rudpheader.type == RUDP_FIN) {
            if(curr_session->receiver->status == OPEN) {
              if(rudpheader.seqno == curr_session->receiver->expected_seqno) {
                /* If the FIN is correct, we can ACK it */
                u_int32_t seqno = curr_session->receiver->expected_seqno + 1;
                struct rudp_packet *p = create_rudp_packet(RUDP_ACK, seqno, 0, NULL);
                send_packet(true, (rudp_socket_t)file, p, &sender);
                free(p);
                curr_session->receiver->session_finished = true;

                if(curr_socket->close_requested) {
                  /* Can we close the socket now? */
                  struct session *head_sessions = curr_socket->sessions_list_head;
                  int all_done = true;
                  while(head_sessions != NULL) {
                    if(head_sessions->sender->session_finished == false) {
                      all_done = false;
                    }
                    else if(head_sessions->receiver != NULL && head_sessions->receiver->session_finished == false) {
                      all_done = false;
                    }
                    else {
                      free(head_sessions->sender);
                      if(head_sessions->receiver) {
                        free(head_sessions->receiver);
                      }
                    }
                    
                    struct session *temp = head_sessions;
                    head_sessions = head_sessions->next;
                    free(temp);
                  }
                  if(all_done) {
                    if(curr_socket->handler != NULL) {
                      curr_socket->handler((rudp_socket_t)file, RUDP_EVENT_CLOSED, &sender);
                      event_fd_delete(receive_callback, (rudp_socket_t)file);
                      close(file);
                      free(curr_socket);
                    }
                  }
                }
              }
              else {
                /* FIN received with incorrect sequence number - ignore it */
              }
            }
          }
        }
      }
    }
  }

  free(received_packet);
  return 0;
}