/* Marks a packet lost, if some packet sent later has been (s)acked. * The underlying idea is similar to the traditional dupthresh and FACK * but they look at different metrics: * * dupthresh: 3 OOO packets delivered (packet count) * FACK: sequence delta to highest sacked sequence (sequence space) * RACK: sent time delta to the latest delivered packet (time domain) * * The advantage of RACK is it applies to both original and retransmitted * packet and therefore is robust against tail losses. Another advantage * is being more resilient to reordering by simply allowing some * "settling delay", instead of tweaking the dupthresh. * * The current version is only used after recovery starts but can be * easily extended to detect the first loss. */ int tcp_rack_mark_lost(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; u32 reo_wnd, prior_retrans = tp->retrans_out; if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced) return 0; /* Reset the advanced flag to avoid unnecessary queue scanning */ tp->rack.advanced = 0; /* To be more reordering resilient, allow min_rtt/4 settling delay * (lower-bounded to 1000uS). We use min_rtt instead of the smoothed * RTT because reordering is often a path property and less related * to queuing or delayed ACKs. * * TODO: measure and adapt to the observed reordering delay, and * use a timer to retransmit like the delayed early retransmit. */ reo_wnd = 1000; if (tp->rack.reord && tcp_min_rtt(tp) != ~0U) reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); tcp_for_write_queue(skb, sk) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); if (skb == tcp_send_head(sk)) break; /* Skip ones already (s)acked */ if (!after(scb->end_seq, tp->snd_una) || scb->sacked & TCPCB_SACKED_ACKED) continue; if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) { if (skb_mstamp_us_delta(&tp->rack.mstamp, &skb->skb_mstamp) <= reo_wnd) continue; /* skb is lost if packet sent later is sacked */ tcp_skb_mark_lost_uncond_verify(tp, skb); if (scb->sacked & TCPCB_SACKED_RETRANS) { scb->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); } } else if (!(scb->sacked & TCPCB_RETRANS)) { /* Original data are sent sequentially so stop early * b/c the rest are all sent after rack_sent */ break; } } return prior_retrans - tp->retrans_out; }
/** * Cleans the meta-socket retransmission queue and the reinject-queue. * @sk must be the metasocket. */ static void mptcp_clean_rtx_queue(struct sock *meta_sk, u32 prior_snd_una) { struct sk_buff *skb, *tmp; struct tcp_sock *meta_tp = tcp_sk(meta_sk); struct mptcp_cb *mpcb = meta_tp->mpcb; int acked = 0; while ((skb = tcp_write_queue_head(meta_sk)) && skb != tcp_send_head(meta_sk)) { if (before(meta_tp->snd_una, TCP_SKB_CB(skb)->end_seq)) break; tcp_unlink_write_queue(skb, meta_sk); if (mptcp_is_data_fin(skb)) { struct sock *sk_it; /* DATA_FIN has been acknowledged - now we can close * the subflows */ mptcp_for_each_sk(mpcb, sk_it) { unsigned long delay = 0; /* If we are the passive closer, don't trigger * subflow-fin until the subflow has been finned * by the peer - thus we add a delay. */ if (mpcb->passive_close && sk_it->sk_state == TCP_ESTABLISHED) delay = inet_csk(sk_it)->icsk_rto << 3; mptcp_sub_close(sk_it, delay); } } meta_tp->packets_out -= tcp_skb_pcount(skb); sk_wmem_free_skb(meta_sk, skb); acked = 1; }