static bool tcp_fastopen_queue_check(struct sock *sk) { struct fastopen_queue *fastopenq; /* Make sure the listener has enabled fastopen, and we don't * exceed the max # of pending TFO requests allowed before trying * to validating the cookie in order to avoid burning CPU cycles * unnecessarily. * * XXX (TFO) - The implication of checking the max_qlen before * processing a cookie request is that clients can't differentiate * between qlen overflow causing Fast Open to be disabled * temporarily vs a server not supporting Fast Open at all. */ fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; if (fastopenq->max_qlen == 0) return false; if (fastopenq->qlen >= fastopenq->max_qlen) { struct request_sock *req1; spin_lock(&fastopenq->lock); req1 = fastopenq->rskq_rst_head; if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { spin_unlock(&fastopenq->lock); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); return false; } fastopenq->rskq_rst_head = req1->dl_next; fastopenq->qlen--; spin_unlock(&fastopenq->lock); reqsk_put(req1); } return true; }
/*lint -e666*/ bool bastet_sock_recv_prepare(struct sock *sk, struct sk_buff *skb) { u8 sync_state; struct bastet_sock *bsk = sk->bastet; if (NULL == bsk) { return false; } sync_state = bsk->bastet_sock_state; switch (sync_state) { case BST_SOCK_INVALID: bsk->bastet_sock_state = BST_SOCK_UPDATING; /* Set retry true, if timeout, request sock sync */ setup_sock_sync_request_timer(sk, true); /* Ps: do not break */ case BST_SOCK_UPDATING: if (unlikely(add_rcvqueues_queue(sk, skb))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); kfree_skb(skb); } return true; default: return false; } }
static int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct net_protocol *ipprot; int protocol = iph->protocol; ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->early_demux) { ipprot->early_demux(skb); /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } } /* * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ if (!skb_dst(skb)) { int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), LINUX_MIB_IPRPFILTER); goto drop; } } #ifdef CONFIG_IP_ROUTE_CLASSID if (unlikely(skb_dst(skb)->tclassid)) { struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); u32 idx = skb_dst(skb)->tclassid; st[idx&0xFF].o_packets++; st[idx&0xFF].o_bytes += skb->len; st[(idx>>16)&0xFF].i_packets++; st[(idx>>16)&0xFF].i_bytes += skb->len; } #endif if (iph->ihl > 5 && ip_rcv_options(skb)) goto drop; rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST, skb->len); return dst_input(skb); drop: kfree_skb(skb); return NET_RX_DROP; }
static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev) { #ifndef __TCS__ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } } }; #endif struct rtable *rt; int flag = 0; /*unsigned long now; */ #ifdef __TCS__ struct flowi fl; memset(&fl,0,sizeof(fl)); fl.nl_u.ip4_u.daddr=sip; fl.nl_u.ip4_u.saddr=tip; #endif if (ip_route_output_key(&rt, &fl) < 0) return 1; if (rt->u.dst.dev != dev) { NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); flag = 1; } ip_rt_put(rt); return flag; }
/* * The three way handshake has completed - we got a valid ACK or DATAACK - * now create the new socket. * * This is the equivalent of TCP's tcp_v4_syn_recv_sock */ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { struct inet_request_sock *ireq; struct inet_sock *newinet; struct dccp_sock *newdp; struct sock *newsk; if (sk_acceptq_is_full(sk)) goto exit_overflow; if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) goto exit; newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto exit; sk_setup_caps(newsk, dst); newdp = dccp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); newinet->daddr = ireq->rmt_addr; newinet->rcv_saddr = ireq->loc_addr; newinet->saddr = ireq->loc_addr; newinet->opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = skb->nh.iph->ttl; newinet->id = jiffies; dccp_sync_mss(newsk, dst_mtu(dst)); __inet_hash_nolisten(newsk); __inet_inherit_port(sk, newsk); return newsk; exit_overflow: NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); exit: NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); dst_release(dst); return NULL; }
/* Returns true if we should perform Fast Open on the SYN. The cookie (foc) * may be updated and return the client in the SYN-ACK later. E.g., Fast Open * cookie request (foc->len == 0). */ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, struct dst_entry *dst) { struct tcp_fastopen_cookie valid_foc = { .len = -1 }; bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && tcp_fastopen_queue_check(sk))) { foc->len = -1; return false; } if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; if (tcp_fastopen_cookie_gen1(req, skb, &valid_foc) && foc->len == TCP_FASTOPEN_COOKIE_SIZE && foc->len == valid_foc.len && !memcmp(foc->val, valid_foc.val, foc->len)) { /* Cookie is valid. Create a (full) child socket to accept * the data in SYN before returning a SYN-ACK to ack the * data. If we fail to create the socket, fall back and * ack the ISN only but includes the same cookie. * * Note: Data-less SYN with valid cookie is allowed to send * data in SYN_RECV state. */ fastopen: if (tcp_fastopen_create_child(sk, skb, dst, req)) { foc->len = -1; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); return true; } } NET_INC_STATS_BH(sock_net(sk), foc->len ? LINUX_MIB_TCPFASTOPENPASSIVEFAIL : LINUX_MIB_TCPFASTOPENCOOKIEREQD); *foc = valid_foc; return false; } EXPORT_SYMBOL(tcp_try_fastopen);
/* Marks a packet lost, if some packet sent later has been (s)acked. * The underlying idea is similar to the traditional dupthresh and FACK * but they look at different metrics: * * dupthresh: 3 OOO packets delivered (packet count) * FACK: sequence delta to highest sacked sequence (sequence space) * RACK: sent time delta to the latest delivered packet (time domain) * * The advantage of RACK is it applies to both original and retransmitted * packet and therefore is robust against tail losses. Another advantage * is being more resilient to reordering by simply allowing some * "settling delay", instead of tweaking the dupthresh. * * The current version is only used after recovery starts but can be * easily extended to detect the first loss. */ int tcp_rack_mark_lost(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; u32 reo_wnd, prior_retrans = tp->retrans_out; if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced) return 0; /* Reset the advanced flag to avoid unnecessary queue scanning */ tp->rack.advanced = 0; /* To be more reordering resilient, allow min_rtt/4 settling delay * (lower-bounded to 1000uS). We use min_rtt instead of the smoothed * RTT because reordering is often a path property and less related * to queuing or delayed ACKs. * * TODO: measure and adapt to the observed reordering delay, and * use a timer to retransmit like the delayed early retransmit. */ reo_wnd = 1000; if (tp->rack.reord && tcp_min_rtt(tp) != ~0U) reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); tcp_for_write_queue(skb, sk) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); if (skb == tcp_send_head(sk)) break; /* Skip ones already (s)acked */ if (!after(scb->end_seq, tp->snd_una) || scb->sacked & TCPCB_SACKED_ACKED) continue; if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) { if (skb_mstamp_us_delta(&tp->rack.mstamp, &skb->skb_mstamp) <= reo_wnd) continue; /* skb is lost if packet sent later is sacked */ tcp_skb_mark_lost_uncond_verify(tp, skb); if (scb->sacked & TCPCB_SACKED_RETRANS) { scb->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); } } else if (!(scb->sacked & TCPCB_RETRANS)) { /* Original data are sent sequentially so stop early * b/c the rest are all sent after rack_sent */ break; } } return prior_retrans - tp->retrans_out; }
static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) { const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct ipv6hdr *ip6h = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); int genhash; u8 newhash[16]; hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr); hash_location = tcp_parse_md5sig_option(th); if (!hash_expected && !hash_location) return 0; if (hash_expected && !hash_location) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return 1; } if (!hash_expected && hash_location) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return 1; } genhash = tcp_v6_md5_hash_skb(newhash, hash_expected, NULL, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { printk(KERN_INFO "MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), &ip6h->daddr, ntohs(th->dest)); } return 1; } return 0; }
int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; /* * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ if (skb_dst(skb) == NULL) { int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, iph->tos, skb->dev); if (unlikely(err)) { if (err == -EHOSTUNREACH) IP_INC_STATS_BH(dev_net(skb->dev), IPSTATS_MIB_INADDRERRORS); else if (err == -ENETUNREACH) IP_INC_STATS_BH(dev_net(skb->dev), IPSTATS_MIB_INNOROUTES); else if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), LINUX_MIB_IPRPFILTER); goto drop; } } #ifdef CONFIG_IP_ROUTE_CLASSID if (unlikely(skb_dst(skb)->tclassid)) { struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); u32 idx = skb_dst(skb)->tclassid; st[idx&0xFF].o_packets++; st[idx&0xFF].o_bytes += skb->len; st[(idx>>16)&0xFF].i_packets++; st[(idx>>16)&0xFF].i_bytes += skb->len; } #endif if (iph->ihl > 5 && ip_rcv_options(skb)) goto drop; rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST, skb->len); return dst_input(skb); drop: kfree_skb(skb); return NET_RX_DROP; }
static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } } }; struct rtable *rt; int flag = 0; /*unsigned long now; */ if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0) return 1; if (rt->u.dst.dev != dev) { NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); flag = 1; } ip_rt_put(rt); return flag; }
static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { struct rtable *rt; int flag = 0; struct net *net = dev_net(dev); rt = ip_route_output(net, sip, tip, 0, 0); if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); flag = 1; } ip_rt_put(rt); return flag; }
void dccp_req_err(struct sock *sk, u64 seq) { struct request_sock *req = inet_reqsk(sk); struct net *net = sock_net(sk); /* * ICMPs are not backlogged, hence we cannot get an established * socket here. */ if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); } else { /* * Still in RESPOND, just remove it silently. * There is no good way to pass the error to the newly * created socket, and POSIX does not want network * errors returned from accept(). */ inet_csk_reqsk_queue_drop(req->rsk_listener, req); } reqsk_put(req); }
/* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. * After adjustment header points to the first 8 bytes of the tcp header. We * need to find the appropriate port. * * The locking strategy used here is very "optimistic". When someone else * accesses the socket the ICMP is just dropped and for some paths there is no * check at all. A more general error queue to queue errors for later handling * is probably better. */ static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct sock *sk; __u64 seq; int err; if (skb->len < (iph->ihl << 2) + 8) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; } sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, iph->saddr, dh->dccph_sport, inet_iif(skb)); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); seq = dccp_hdr_seq(skb); if (sk->sk_state != DCCP_LISTEN && !between48(seq, dp->dccps_swl, dp->dccps_swh)) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } switch (type) { case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ goto out; case ICMP_PARAMETERPROB: err = EPROTO; break; case ICMP_DEST_UNREACH: if (code > NR_ICMP_UNREACH) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ if (!sock_owned_by_user(sk)) dccp_do_pmtu_discovery(sk, iph, info); goto out; } err = icmp_err_convert[code].errno; break; case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; break; default: goto out; } switch (sk->sk_state) { struct request_sock *req , **prev; case DCCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet_csk_search_req(sk, &prev, dh->dccph_dport, iph->daddr, iph->saddr); if (!req) goto out; /* * ICMPs are not backlogged, hence we cannot get an established * socket here. */ WARN_ON(req->sk); if (seq != dccp_rsk(req)->dreq_iss) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } /* * Still in RESPOND, just remove it silently. * There is no good way to pass the error to the newly * created socket, and POSIX does not want network * errors returned from accept(). */ inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case DCCP_REQUESTING: case DCCP_RESPOND: if (!sock_owned_by_user(sk)) { DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } /* If we've already connected we will keep trying * until we time out, or the user gives up. * * rfc1122 4.2.3.9 allows to consider as hard errors * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, * but it is obsoleted by pmtu discovery). * * Note, that in modern internet, where routing is unreliable * and in each dark corner broken firewalls sit, sending random * errors ordered by their masters even this two messages finally lose * their original sense (even Linux sends invalid PORT_UNREACHs) * * Now we are in compliance with RFCs. * --ANK (980905) */ inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else /* Only an error on timeout */ sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const u8 offset = iph->ihl << 2; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct sock *sk; __u64 seq; int err; struct net *net = dev_net(skb->dev); if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } sk = inet_lookup(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, iph->saddr, dh->dccph_sport, inet_iif(skb)); if (sk == NULL) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } switch (type) { case ICMP_SOURCE_QUENCH: goto out; case ICMP_PARAMETERPROB: err = EPROTO; break; case ICMP_DEST_UNREACH: if (code > NR_ICMP_UNREACH) goto out; if (code == ICMP_FRAG_NEEDED) { if (!sock_owned_by_user(sk)) dccp_do_pmtu_discovery(sk, iph, info); goto out; } err = icmp_err_convert[code].errno; break; case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; break; default: goto out; } switch (sk->sk_state) { struct request_sock *req , **prev; case DCCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet_csk_search_req(sk, &prev, dh->dccph_dport, iph->daddr, iph->saddr); if (!req) goto out; WARN_ON(req->sk); if (seq != dccp_rsk(req)->dreq_iss) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case DCCP_REQUESTING: case DCCP_RESPOND: if (!sock_owned_by_user(sk)) { DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct inet_sock *newinet; struct dccp_sock *newdp; struct dccp6_sock *newdp6; struct sock *newsk; struct ipv6_txoptions *opt; if (skb->protocol == htons(ETH_P_IP)) { /* * v6 mapped */ newsk = dccp_v4_request_recv_sock(sk, skb, req, dst); if (newsk == NULL) return NULL; newdp6 = (struct dccp6_sock *)newsk; newdp = dccp_sk(newsk); newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF), newinet->daddr); ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF), newinet->saddr); ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count * here, dccp_create_openreq_child now does this for us, see the comment in * that function for the gory details. -acme */ /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 icsk.icsk_af_ops. Sync it now. */ dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } opt = np->opt; if (sk_acceptq_is_full(sk)) goto out_overflow; if (dst == NULL) { struct in6_addr *final_p = NULL, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); if (opt != NULL && opt->srcrt != NULL) { const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; ipv6_addr_copy(&final, &fl.fl6_dst); ipv6_addr_copy(&fl.fl6_dst, rt0->addr); final_p = &final; } ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto out; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks * count here, dccp_create_openreq_child now does this for us, see the * comment in that function for the gory details. -acme */ __ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); if (!sysctl_tcp_use_sg) newsk->sk_route_caps &= ~NETIF_F_SG; newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newdp = dccp_sk(newsk); newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr); ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr); ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr); newsk->sk_bound_dev_if = ireq6->iif; /* Now IPv6 options... First: no IPv4 options. */ newinet->opt = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; if (ireq6->pktopts != NULL) { newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC); kfree_skb(ireq6->pktopts); ireq6->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * Clone native IPv6 options from listening socket (if any) * * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ if (opt != NULL) { newnp->opt = ipv6_dup_options(newsk, opt); if (opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); } inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt != NULL) inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + newnp->opt->opt_flen); dccp_sync_mss(newsk, dst_mtu(dst)); newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; __inet6_hash(newsk); __inet_inherit_port(sk, newsk); return newsk; out_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); if (opt != NULL && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); return NULL; }
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info) { struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; int err; struct tcp_sock *tp; __u32 seq; sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == TCP_TIME_WAIT) { inet_twsk_put((struct inet_timewait_sock *)sk); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; tp = tcp_sk(sk); seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) goto out; /* icmp should have updated the destination cache entry */ dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct flowi fl; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; if ((err = ip6_dst_lookup(sk, &dst, &fl))) { sk->sk_err_soft = -err; goto out; } if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; goto out; } } else dst_hold(dst); if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ dst_release(dst); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (!req) goto out; /* ICMPs are not backlogged, hence we cannot get * an established socket here. */ BUG_TRAP(req->sk == NULL); if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: case TCP_SYN_RECV: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; __u64 seq; struct net *net = dev_net(skb->dev); if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } sk = inet6_lookup(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; /* icmp should have updated the destination cache entry */ dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct flowi fl; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) { sk->sk_err_soft = -err; goto out; } err = xfrm_lookup(net, &dst, &fl, sk, 0); if (err < 0) { sk->sk_err_soft = -err; goto out; } } else dst_hold(dst); if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { dccp_sync_mss(sk, dst_mtu(dst)); } /* else let the usual retransmit timer handle it */ dst_release(dst); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; case DCCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, dh->dccph_dport, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (req == NULL) goto out; /* * ICMPs are not backlogged, hence we cannot get an established * socket here. */ WARN_ON(req->sk != NULL); if (seq != dccp_rsk(req)->dreq_iss) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case DCCP_REQUESTING: case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error * (see connect in sock.c) */ sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
/** * This is main body of the socket close function in Sync Sockets. * * inet_release() can sleep (as well as tcp_close()), so we make our own * non-sleepable socket closing. * * This function must be used only for data sockets. * Use standard sock_release() for listening sockets. * * In most cases it is called in softirq context and from ksoftirqd which * processes data from the socket (RSS and RPS distribute packets that way). * * Note: it used to be called in process context as well, at the time when * Tempesta starts or stops. That's not the case right now, but it may change. * * TODO In some cases we need to close socket agresively w/o FIN_WAIT_2 state, * e.g. by sending RST. So we need to add second parameter to the function * which says how to close the socket. * One of the examples is rcl_req_limit() (it should reset connections). * See tcp_sk(sk)->linger2 processing in standard tcp_close(). * * Called with locked socket. */ static void ss_do_close(struct sock *sk) { struct sk_buff *skb; int data_was_unread = 0; int state; if (unlikely(!sk)) return; SS_DBG("Close socket %p (%s): cpu=%d account=%d refcnt=%d\n", sk, ss_statename[sk->sk_state], smp_processor_id(), sk_has_account(sk), atomic_read(&sk->sk_refcnt)); assert_spin_locked(&sk->sk_lock.slock); ss_sock_cpu_check(sk); BUG_ON(sk->sk_state == TCP_LISTEN); /* We must return immediately, so LINGER option is meaningless. */ WARN_ON(sock_flag(sk, SOCK_LINGER)); /* We don't support virtual containers, so TCP_REPAIR is prohibited. */ WARN_ON(tcp_sk(sk)->repair); /* The socket must have atomic allocation mask. */ WARN_ON(!(sk->sk_allocation & GFP_ATOMIC)); /* The below is mostly copy-paste from tcp_close(). */ sk->sk_shutdown = SHUTDOWN_MASK; while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - tcp_hdr(skb)->fin; data_was_unread += len; SS_DBG("free rcv skb %p\n", skb); __kfree_skb(skb); } sk_mem_reclaim(sk); if (sk->sk_state == TCP_CLOSE) goto adjudge_to_death; if (data_was_unread) { NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, sk->sk_allocation); } else if (tcp_close_state(sk)) { /* The code below is taken from tcp_send_fin(). */ struct tcp_sock *tp = tcp_sk(sk); int mss_now = tcp_current_mss(sk); skb = tcp_write_queue_tail(sk); if (tcp_send_head(sk) != NULL) { /* Send FIN with data if we have any. */ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; } else { /* No data to send in the socket, allocate new skb. */ skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); if (!skb) { SS_WARN("can't send FIN due to bad alloc"); } else { skb_reserve(skb, MAX_TCP_HEADER); tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); tcp_queue_skb(sk, skb); } } __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); } adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); /* * SS sockets are processed in softirq only, * so backlog queue should be empty. */ WARN_ON(sk->sk_backlog.tail); percpu_counter_inc(sk->sk_prot->orphan_count); if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) return; if (sk->sk_state == TCP_FIN_WAIT2) { const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); return; } } if (sk->sk_state != TCP_CLOSE) { sk_mem_reclaim(sk); if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } } if (sk->sk_state == TCP_CLOSE) { struct request_sock *req = tcp_sk(sk)->fastopen_rsk; if (req != NULL) reqsk_fastopen_remove(sk, req, false); inet_csk_destroy_sock(sk); } }
static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; if (skb->protocol == htons(ETH_P_IP)) { /* * v6 mapped */ newsk = dccp_v4_request_recv_sock(sk, skb, req, dst); if (newsk == NULL) return NULL; newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); newsk->sk_v6_rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count * here, dccp_create_openreq_child now does this for us, see the comment in * that function for the gory details. -acme */ /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 icsk.icsk_af_ops. Sync it now. */ dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } if (sk_acceptq_is_full(sk)) goto out_overflow; if (dst == NULL) { struct in6_addr *final_p, final; struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, np->opt, &final); fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = htons(ireq->ir_num); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) goto out; } newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto out_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks * count here, dccp_create_openreq_child now does this for us, see the * comment in that function for the gory details. -acme */ __ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... First: no IPv4 options. */ newinet->inet_opt = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; if (ireq->pktopts != NULL) { newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); consume_skb(ireq->pktopts); ireq->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * Clone native IPv6 options from listening socket (if any) * * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ if (np->opt != NULL) newnp->opt = ipv6_dup_options(newsk, np->opt); inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt != NULL) inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + newnp->opt->opt_flen); dccp_sync_mss(newsk, dst_mtu(dst)); newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); dccp_done(newsk); goto out; } __inet6_hash(newsk, NULL); return newsk; out_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: dst_release(dst); out: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; }
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; int err; struct tcp_sock *tp; __u32 seq; struct net *net = dev_net(skb->dev); sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == TCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); goto out; } tp = tcp_sk(sk); seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) goto out; dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = np->daddr; fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sock_i_uid(sk); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); goto out; } } else dst_hold(dst); if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } dst_release(dst); goto out; } icmpv6_err_convert(type, code, &err); switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (!req) goto out; WARN_ON(req->sk != NULL); if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: case TCP_SYN_RECV: if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); tcp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; __u64 seq; struct net *net = dev_net(skb->dev); if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } sk = inet6_lookup(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == NDISC_REDIRECT) { struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) dst->ops->redirect(dst, sk, skb); goto out; } if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (!ip6_sk_accept_pmtu(sk)) goto out; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; dst = inet6_csk_update_pmtu(sk, ntohl(info)); if (!dst) goto out; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) dccp_sync_mss(sk, dst_mtu(dst)); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; case DCCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, dh->dccph_dport, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (req == NULL) goto out; /* * ICMPs are not backlogged, hence we cannot get an established * socket here. */ WARN_ON(req->sk != NULL); if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case DCCP_REQUESTING: case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error * (see connect in sock.c) */ sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
/* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. * After adjustment header points to the first 8 bytes of the tcp header. We * need to find the appropriate port. * * The locking strategy used here is very "optimistic". When someone else * accesses the socket the ICMP is just dropped and for some paths there is no * check at all. A more general error queue to queue errors for later handling * is probably better. */ static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const u8 offset = iph->ihl << 2; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct sock *sk; __u64 seq; int err; struct net *net = dev_net(skb->dev); if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } sk = __inet_lookup_established(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, iph->saddr, ntohs(dh->dccph_sport), inet_iif(skb)); if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } seq = dccp_hdr_seq(dh); if (sk->sk_state == DCCP_NEW_SYN_RECV) return dccp_req_err(sk, seq); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } switch (type) { case ICMP_REDIRECT: dccp_do_redirect(skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ goto out; case ICMP_PARAMETERPROB: err = EPROTO; break; case ICMP_DEST_UNREACH: if (code > NR_ICMP_UNREACH) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ if (!sock_owned_by_user(sk)) dccp_do_pmtu_discovery(sk, iph, info); goto out; } err = icmp_err_convert[code].errno; break; case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; break; default: goto out; } switch (sk->sk_state) { case DCCP_REQUESTING: case DCCP_RESPOND: if (!sock_owned_by_user(sk)) { DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } /* If we've already connected we will keep trying * until we time out, or the user gives up. * * rfc1122 4.2.3.9 allows to consider as hard errors * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, * but it is obsoleted by pmtu discovery). * * Note, that in modern internet, where routing is unreliable * and in each dark corner broken firewalls sit, sending random * errors ordered by their masters even this two messages finally lose * their original sense (even Linux sends invalid PORT_UNREACHs) * * Now we are in compliance with RFCs. * --ANK (980905) */ inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else /* Only an error on timeout */ sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk && !ip_is_fragment(iph)) { const struct net_protocol *ipprot; int protocol = iph->protocol; ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->early_demux) { ipprot->early_demux(skb); /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } } /* * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ if (!skb_valid_dst(skb)) { int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(net, LINUX_MIB_IPRPFILTER); goto drop; } } #ifdef CONFIG_IP_ROUTE_CLASSID if (unlikely(skb_dst(skb)->tclassid)) { struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); u32 idx = skb_dst(skb)->tclassid; st[idx&0xFF].o_packets++; st[idx&0xFF].o_bytes += skb->len; st[(idx>>16)&0xFF].i_packets++; st[(idx>>16)&0xFF].i_bytes += skb->len; } #endif if (iph->ihl > 5 && ip_rcv_options(skb)) goto drop; rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) { IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len); } else if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { struct in_device *in_dev = __in_dev_get_rcu(skb->dev); /* RFC 1122 3.3.6: * * When a host sends a datagram to a link-layer broadcast * address, the IP destination address MUST be a legal IP * broadcast or IP multicast address. * * A host SHOULD silently discard a datagram that is received * via a link-layer broadcast (see Section 2.4) but does not * specify an IP multicast or broadcast destination address. * * This doesn't explicitly say L2 *broadcast*, but broadcast is * in a way a form of multicast and the most common use case for * this is 802.11 protecting against cross-station spoofing (the * so-called "hole-196" attack) so do it for both. */ if (in_dev && IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) goto drop; } return dst_input(skb); drop: kfree_skb(skb); return NET_RX_DROP; }