Beispiel #1
0
/*
 * This function is called to set a Fast Open socket's "fastopen_rsk" field
 * to NULL when a TFO socket no longer needs to access the request_sock.
 * This happens only after 3WHS has been either completed or aborted (e.g.,
 * RST is received).
 *
 * Before TFO, a child socket is created only after 3WHS is completed,
 * hence it never needs to access the request_sock. things get a lot more
 * complex with TFO. A child socket, accepted or not, has to access its
 * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts,
 * until 3WHS is either completed or aborted. Afterwards the req will stay
 * until either the child socket is accepted, or in the rare case when the
 * listener is closed before the child is accepted.
 *
 * In short, a request socket is only freed after BOTH 3WHS has completed
 * (or aborted) and the child socket has been accepted (or listener closed).
 * When a child socket is accepted, its corresponding req->sk is set to
 * NULL since it's no longer needed. More importantly, "req->sk == NULL"
 * will be used by the code below to determine if a child socket has been
 * accepted or not, and the check is protected by the fastopenq->lock
 * described below.
 *
 * Note that fastopen_rsk is only accessed from the child socket's context
 * with its socket lock held. But a request_sock (req) can be accessed by
 * both its child socket through fastopen_rsk, and a listener socket through
 * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin
 * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created.
 * only in the rare case when both the listener and the child locks are held,
 * e.g., in inet_csk_listen_stop() do we not need to acquire the lock.
 * The lock also protects other fields such as fastopenq->qlen, which is
 * decremented by this function when fastopen_rsk is no longer needed.
 *
 * Note that another solution was to simply use the existing socket lock
 * from the listener. But first socket lock is difficult to use. It is not
 * a simple spin lock - one must consider sock_owned_by_user() and arrange
 * to use sk_add_backlog() stuff. But what really makes it infeasible is the
 * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to
 * acquire a child's lock while holding listener's socket lock. A corner
 * case might also exist in tcp_v4_hnd_req() that will trigger this locking
 * order.
 *
 * When a TFO req is created, it needs to sock_hold its listener to prevent
 * the latter data structure from going away.
 *
 * This function also sets "treq->listener" to NULL and unreference listener
 * socket. treq->listener is used by the listener so it is protected by the
 * fastopenq->lock in this function.
 */
void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
			   bool reset)
{
	struct sock *lsk = tcp_rsk(req)->listener;
	struct fastopen_queue *fastopenq =
	    inet_csk(lsk)->icsk_accept_queue.fastopenq;

	tcp_sk(sk)->fastopen_rsk = NULL;
	spin_lock_bh(&fastopenq->lock);
	fastopenq->qlen--;
	tcp_rsk(req)->listener = NULL;
	if (req->sk)	/* the child socket hasn't been accepted yet */
		goto out;

	if (!reset || lsk->sk_state != TCP_LISTEN) {
		/* If the listener has been closed don't bother with the
		 * special RST handling below.
		 */
		spin_unlock_bh(&fastopenq->lock);
		sock_put(lsk);
		reqsk_free(req);
		return;
	}
	/* Wait for 60secs before removing a req that has triggered RST.
	 * This is a simple defense against TFO spoofing attack - by
	 * counting the req against fastopen.max_qlen, and disabling
	 * TFO when the qlen exceeds max_qlen.
	 *
	 * For more details see CoNext'11 "TCP Fast Open" paper.
	 */
	req->expires = jiffies + 60*HZ;
	if (fastopenq->rskq_rst_head == NULL)
		fastopenq->rskq_rst_head = req;
	else
		fastopenq->rskq_rst_tail->dl_next = req;

	req->dl_next = NULL;
	fastopenq->rskq_rst_tail = req;
	fastopenq->qlen++;
out:
	spin_unlock_bh(&fastopenq->lock);
	sock_put(lsk);
	return;
}
Beispiel #2
0
static struct request_sock *rst_reqsk_alloc(unsigned short family)
{
	struct request_sock *req;

	if (family == AF_INET)
		req = reqsk_alloc(&tcp_request_sock_ops);
	else
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
		req = reqsk_alloc(&tcp6_request_sock_ops);
#else
		return ERR_PTR(-EINVAL);
#endif
#ifdef CONFIG_TCP_MD5SIG
	if (req) {
		if (family == AF_INET)
			tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
		else
			tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
	}
#endif
	return req;
}
Beispiel #3
0
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
		u8 type, u8 code, int offset, __be32 info)
{
	const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
	struct ipv6_pinfo *np;
	struct sock *sk;
	int err;
	struct tcp_sock *tp;
	__u32 seq;
	struct net *net = dev_net(skb->dev);

	sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
			th->dest, &hdr->saddr, th->source, skb->dev->ifindex);

	if (sk == NULL) {
		ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
				   ICMP6_MIB_INERRORS);
		return;
	}

	if (sk->sk_state == TCP_TIME_WAIT) {
		inet_twsk_put(inet_twsk(sk));
		return;
	}

	bh_lock_sock(sk);
	if (sock_owned_by_user(sk))
		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);

	if (sk->sk_state == TCP_CLOSE)
		goto out;

	if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
		goto out;
	}

	tp = tcp_sk(sk);
	seq = ntohl(th->seq);
	if (sk->sk_state != TCP_LISTEN &&
	    !between(seq, tp->snd_una, tp->snd_nxt)) {
		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
		goto out;
	}

	np = inet6_sk(sk);

	if (type == ICMPV6_PKT_TOOBIG) {
		struct dst_entry *dst;

		if (sock_owned_by_user(sk))
			goto out;
		if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
			goto out;

		
		dst = __sk_dst_check(sk, np->dst_cookie);

		if (dst == NULL) {
			struct inet_sock *inet = inet_sk(sk);
			struct flowi6 fl6;

			memset(&fl6, 0, sizeof(fl6));
			fl6.flowi6_proto = IPPROTO_TCP;
			fl6.daddr = np->daddr;
			fl6.saddr = np->saddr;
			fl6.flowi6_oif = sk->sk_bound_dev_if;
			fl6.flowi6_mark = sk->sk_mark;
			fl6.fl6_dport = inet->inet_dport;
			fl6.fl6_sport = inet->inet_sport;
			fl6.flowi6_uid = sock_i_uid(sk);
			security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));

			dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
			if (IS_ERR(dst)) {
				sk->sk_err_soft = -PTR_ERR(dst);
				goto out;
			}

		} else
			dst_hold(dst);

		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
			tcp_sync_mss(sk, dst_mtu(dst));
			tcp_simple_retransmit(sk);
		} 
		dst_release(dst);
		goto out;
	}

	icmpv6_err_convert(type, code, &err);

	
	switch (sk->sk_state) {
		struct request_sock *req, **prev;
	case TCP_LISTEN:
		if (sock_owned_by_user(sk))
			goto out;

		req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
					   &hdr->saddr, inet6_iif(skb));
		if (!req)
			goto out;

		WARN_ON(req->sk != NULL);

		if (seq != tcp_rsk(req)->snt_isn) {
			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
			goto out;
		}

		inet_csk_reqsk_queue_drop(sk, req, prev);
		goto out;

	case TCP_SYN_SENT:
	case TCP_SYN_RECV:  
		if (!sock_owned_by_user(sk)) {
			sk->sk_err = err;
			sk->sk_error_report(sk);		

			tcp_done(sk);
		} else
			sk->sk_err_soft = err;
		goto out;
	}

	if (!sock_owned_by_user(sk) && np->recverr) {
		sk->sk_err = err;
		sk->sk_error_report(sk);
	} else
		sk->sk_err_soft = err;

out:
	bh_unlock_sock(sk);
	sock_put(sk);
}
Beispiel #4
0
int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si,
			      loff_t pos, struct cpt_context *ctx)
{
	int err;
	loff_t end = pos + si->cpt_next;

	pos += si->cpt_hdrlen;

	lock_sock(sk);
	while (pos < end) {
		struct cpt_openreq_image oi;

		err = rst_sock_attr(&pos, sk, ctx);
		if (!err)
			continue;
		if (err < 0)
			goto out;

		err = rst_get_object(CPT_OBJ_OPENREQ, pos, &oi, ctx);
		if (err)
			goto out;

		if (oi.cpt_object == CPT_OBJ_OPENREQ) {
			struct request_sock *req;

			if (oi.cpt_family == AF_INET6 &&
			    sk->sk_family != AF_INET6)
				/* related to non initialized cpt_family bug */
				goto next;
			req = rst_reqsk_alloc(oi.cpt_family);
			if (IS_ERR(req)) {
				release_sock(sk);
				return PTR_ERR(req);
			}

			if (req == NULL) {
				release_sock(sk);
				return -ENOMEM;
			}

			tcp_rsk(req)->rcv_isn = oi.cpt_rcv_isn;
			tcp_rsk(req)->snt_isn = oi.cpt_snt_isn;
			inet_rsk(req)->rmt_port = oi.cpt_rmt_port;
			req->mss = oi.cpt_mss;
			req->retrans = oi.cpt_retrans;
			inet_rsk(req)->snd_wscale = oi.cpt_snd_wscale;
			inet_rsk(req)->rcv_wscale = oi.cpt_rcv_wscale;
			inet_rsk(req)->tstamp_ok = oi.cpt_tstamp_ok;
			inet_rsk(req)->sack_ok = oi.cpt_sack_ok;
			inet_rsk(req)->wscale_ok = oi.cpt_wscale_ok;
			inet_rsk(req)->ecn_ok = oi.cpt_ecn_ok;
			inet_rsk(req)->acked = oi.cpt_acked;
			inet_rsk(req)->opt = NULL;
			req->window_clamp = oi.cpt_window_clamp;
			req->rcv_wnd = oi.cpt_rcv_wnd;
			req->ts_recent = oi.cpt_ts_recent;
			req->expires = jiffies_import(oi.cpt_expires);
			req->sk = NULL;
			req->secid = 0;
			req->peer_secid = 0;

			if (oi.cpt_family == AF_INET6) {
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
				inet6_rsk(req)->pktopts = NULL;
				memcpy(&inet6_rsk(req)->loc_addr, oi.cpt_loc_addr, 16);
				memcpy(&inet6_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 16);
				inet6_rsk(req)->iif = oi.cpt_iif;
				inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
#endif
			} else {
				memcpy(&inet_rsk(req)->loc_addr, oi.cpt_loc_addr, 4);
				memcpy(&inet_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 4);
				inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
			}
		}
next:
		pos += oi.cpt_next;
	}
	err = 0;
out:
	release_sock(sk);
	return err;
}
Beispiel #5
0
static bool tcp_fastopen_create_child(struct sock *sk,
                                      struct sk_buff *skb,
                                      struct dst_entry *dst,
                                      struct request_sock *req)
{
    struct tcp_sock *tp;
    struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
    struct sock *child;
    u32 end_seq;

    req->num_retrans = 0;
    req->num_timeout = 0;
    req->sk = NULL;

    child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
    if (!child)
        return false;

    spin_lock(&queue->fastopenq->lock);
    queue->fastopenq->qlen++;
    spin_unlock(&queue->fastopenq->lock);

    /* Initialize the child socket. Have to fix some values to take
     * into account the child is a Fast Open socket and is created
     * only out of the bits carried in the SYN packet.
     */
    tp = tcp_sk(child);

    tp->fastopen_rsk = req;
    tcp_rsk(req)->tfo_listener = true;

    /* RFC1323: The window in SYN & SYN/ACK segments is never
     * scaled. So correct it appropriately.
     */
    tp->snd_wnd = ntohs(tcp_hdr(skb)->window);

    /* Activate the retrans timer so that SYNACK can be retransmitted.
     * The request socket is not added to the SYN table of the parent
     * because it's been added to the accept queue directly.
     */
    inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
                              TCP_TIMEOUT_INIT, TCP_RTO_MAX);

    atomic_set(&req->rsk_refcnt, 1);
    /* Add the child socket directly into the accept queue */
    inet_csk_reqsk_queue_add(sk, req, child);

    /* Now finish processing the fastopen child socket. */
    inet_csk(child)->icsk_af_ops->rebuild_header(child);
    tcp_init_congestion_control(child);
    tcp_mtup_init(child);
    tcp_init_metrics(child);
    tcp_init_buffer_space(child);

    /* Queue the data carried in the SYN packet. We need to first
     * bump skb's refcnt because the caller will attempt to free it.
     * Note that IPv6 might also have used skb_get() trick
     * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts)
     * So we need to eventually get a clone of the packet,
     * before inserting it in sk_receive_queue.
     *
     * XXX (TFO) - we honor a zero-payload TFO request for now,
     * (any reason not to?) but no need to queue the skb since
     * there is no data. How about SYN+FIN?
     */
    end_seq = TCP_SKB_CB(skb)->end_seq;
    if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
        struct sk_buff *skb2;

        if (unlikely(skb_shared(skb)))
            skb2 = skb_clone(skb, GFP_ATOMIC);
        else
            skb2 = skb_get(skb);

        if (likely(skb2)) {
            skb_dst_drop(skb2);
            __skb_pull(skb2, tcp_hdrlen(skb));
            skb_set_owner_r(skb2, child);
            __skb_queue_tail(&child->sk_receive_queue, skb2);
            tp->syn_data_acked = 1;

            /* u64_stats_update_begin(&tp->syncp) not needed here,
             * as we certainly are not changing upper 32bit value (0)
             */
            tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
        } else {
            end_seq = TCP_SKB_CB(skb)->seq + 1;
        }
    }
    tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
    sk->sk_data_ready(sk);
    bh_unlock_sock(child);
    sock_put(child);
    WARN_ON(!req->sk);
    return true;
}
Beispiel #6
0
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
		int type, int code, int offset, __u32 info)
{
	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
	struct ipv6_pinfo *np;
	struct sock *sk;
	int err;
	struct tcp_sock *tp; 
	__u32 seq;

	sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
			  th->source, skb->dev->ifindex);

	if (sk == NULL) {
		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
		return;
	}

	if (sk->sk_state == TCP_TIME_WAIT) {
		inet_twsk_put((struct inet_timewait_sock *)sk);
		return;
	}

	bh_lock_sock(sk);
	if (sock_owned_by_user(sk))
		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);

	if (sk->sk_state == TCP_CLOSE)
		goto out;

	tp = tcp_sk(sk);
	seq = ntohl(th->seq); 
	if (sk->sk_state != TCP_LISTEN &&
	    !between(seq, tp->snd_una, tp->snd_nxt)) {
		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
		goto out;
	}

	np = inet6_sk(sk);

	if (type == ICMPV6_PKT_TOOBIG) {
		struct dst_entry *dst = NULL;

		if (sock_owned_by_user(sk))
			goto out;
		if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
			goto out;

		/* icmp should have updated the destination cache entry */
		dst = __sk_dst_check(sk, np->dst_cookie);

		if (dst == NULL) {
			struct inet_sock *inet = inet_sk(sk);
			struct flowi fl;

			/* BUGGG_FUTURE: Again, it is not clear how
			   to handle rthdr case. Ignore this complexity
			   for now.
			 */
			memset(&fl, 0, sizeof(fl));
			fl.proto = IPPROTO_TCP;
			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
			fl.oif = sk->sk_bound_dev_if;
			fl.fl_ip_dport = inet->dport;
			fl.fl_ip_sport = inet->sport;

			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
				sk->sk_err_soft = -err;
				goto out;
			}

			if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
				sk->sk_err_soft = -err;
				goto out;
			}

		} else
			dst_hold(dst);

		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
			tcp_sync_mss(sk, dst_mtu(dst));
			tcp_simple_retransmit(sk);
		} /* else let the usual retransmit timer handle it */
		dst_release(dst);
		goto out;
	}

	icmpv6_err_convert(type, code, &err);

	/* Might be for an request_sock */
	switch (sk->sk_state) {
		struct request_sock *req, **prev;
	case TCP_LISTEN:
		if (sock_owned_by_user(sk))
			goto out;

		req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
					   &hdr->saddr, inet6_iif(skb));
		if (!req)
			goto out;

		/* ICMPs are not backlogged, hence we cannot get
		 * an established socket here.
		 */
		BUG_TRAP(req->sk == NULL);

		if (seq != tcp_rsk(req)->snt_isn) {
			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
			goto out;
		}

		inet_csk_reqsk_queue_drop(sk, req, prev);
		goto out;

	case TCP_SYN_SENT:
	case TCP_SYN_RECV:  /* Cannot happen.
			       It can, it SYNs are crossed. --ANK */ 
		if (!sock_owned_by_user(sk)) {
			sk->sk_err = err;
			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */

			tcp_done(sk);
		} else
			sk->sk_err_soft = err;
		goto out;
	}

	if (!sock_owned_by_user(sk) && np->recverr) {
		sk->sk_err = err;
		sk->sk_error_report(sk);
	} else
		sk->sk_err_soft = err;

out:
	bh_unlock_sock(sk);
	sock_put(sk);
}
Beispiel #7
0
void tcp_v4_err(struct sk_buff *skb, u32 info)
{
	struct iphdr *iph = (struct iphdr *)skb->data;
	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
	struct tcp_sock *tp;
	struct inet_sock *inet;
	int type = skb->h.icmph->type;
	int code = skb->h.icmph->code;
	struct sock *sk;
	__u32 seq;
	int err;

	if (skb->len < (iph->ihl << 2) + 8) {
		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
		return;
	}

	sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
			 th->source, inet_iif(skb));
	if (!sk) {
		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
		return;
	}
	if (sk->sk_state == TCP_TIME_WAIT) {
		inet_twsk_put((struct inet_timewait_sock *)sk);
		return;
	}

	bh_lock_sock(sk);
	/* If too many ICMPs get dropped on busy
	 * servers this needs to be solved differently.
	 */
	if (sock_owned_by_user(sk))
		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);

	if (sk->sk_state == TCP_CLOSE)
		goto out;

	tp = tcp_sk(sk);
	seq = ntohl(th->seq);
	if (sk->sk_state != TCP_LISTEN &&
	    !between(seq, tp->snd_una, tp->snd_nxt)) {
		NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
		goto out;
	}

	switch (type) {
	case ICMP_SOURCE_QUENCH:
		/* Just silently ignore these. */
		goto out;
	case ICMP_PARAMETERPROB:
		err = EPROTO;
		break;
	case ICMP_DEST_UNREACH:
		if (code > NR_ICMP_UNREACH)
			goto out;

		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
			if (!sock_owned_by_user(sk))
				do_pmtu_discovery(sk, iph, info);
			goto out;
		}

		err = icmp_err_convert[code].errno;
		break;
	case ICMP_TIME_EXCEEDED:
		err = EHOSTUNREACH;
		break;
	default:
		goto out;
	}

	switch (sk->sk_state) {
		struct request_sock *req, **prev;
	case TCP_LISTEN:
		if (sock_owned_by_user(sk))
			goto out;

		req = inet_csk_search_req(sk, &prev, th->dest,
					  iph->daddr, iph->saddr);
		if (!req)
			goto out;

		/* ICMPs are not backlogged, hence we cannot get
		   an established socket here.
		 */
		BUG_TRAP(!req->sk);

		if (seq != tcp_rsk(req)->snt_isn) {
			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
			goto out;
		}

		/*
		 * Still in SYN_RECV, just remove it silently.
		 * There is no good way to pass the error to the newly
		 * created socket, and POSIX does not want network
		 * errors returned from accept().
		 */
		inet_csk_reqsk_queue_drop(sk, req, prev);
		goto out;

	case TCP_SYN_SENT:
	case TCP_SYN_RECV:  /* Cannot happen.
			       It can f.e. if SYNs crossed.
			     */
		if (!sock_owned_by_user(sk)) {
			sk->sk_err = err;

			sk->sk_error_report(sk);

			tcp_done(sk);
		} else {
			sk->sk_err_soft = err;
		}
		goto out;
	}

	/* If we've already connected we will keep trying
	 * until we time out, or the user gives up.
	 *
	 * rfc1122 4.2.3.9 allows to consider as hard errors
	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
	 * but it is obsoleted by pmtu discovery).
	 *
	 * Note, that in modern internet, where routing is unreliable
	 * and in each dark corner broken firewalls sit, sending random
	 * errors ordered by their masters even this two messages finally lose
	 * their original sense (even Linux sends invalid PORT_UNREACHs)
	 *
	 * Now we are in compliance with RFCs.
	 *							--ANK (980905)
	 */

	inet = inet_sk(sk);
	if (!sock_owned_by_user(sk) && inet->recverr) {
		sk->sk_err = err;
		sk->sk_error_report(sk);
	} else	{ /* Only an error on timeout */
		sk->sk_err_soft = err;
	}

out:
	bh_unlock_sock(sk);
	sock_put(sk);
}
Beispiel #8
0
static struct sock *tcp_fastopen_create_child(struct sock *sk,
					      struct sk_buff *skb,
					      struct dst_entry *dst,
					      struct request_sock *req)
{
	struct tcp_sock *tp;
	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
	struct sock *child;
	bool own_req;

	req->num_retrans = 0;
	req->num_timeout = 0;
	req->sk = NULL;

	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
							 NULL, &own_req);
	if (!child)
		return NULL;

	spin_lock(&queue->fastopenq.lock);
	queue->fastopenq.qlen++;
	spin_unlock(&queue->fastopenq.lock);

	/* Initialize the child socket. Have to fix some values to take
	 * into account the child is a Fast Open socket and is created
	 * only out of the bits carried in the SYN packet.
	 */
	tp = tcp_sk(child);

	tp->fastopen_rsk = req;
	tcp_rsk(req)->tfo_listener = true;

	/* RFC1323: The window in SYN & SYN/ACK segments is never
	 * scaled. So correct it appropriately.
	 */
	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
	tp->max_window = tp->snd_wnd;

	/* Activate the retrans timer so that SYNACK can be retransmitted.
	 * The request socket is not added to the ehash
	 * because it's been added to the accept queue directly.
	 */
	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
				  TCP_TIMEOUT_INIT, TCP_RTO_MAX);

	atomic_set(&req->rsk_refcnt, 2);

	/* Now finish processing the fastopen child socket. */
	inet_csk(child)->icsk_af_ops->rebuild_header(child);
	tcp_init_congestion_control(child);
	tcp_mtup_init(child);
	tcp_init_metrics(child);
	tcp_init_buffer_space(child);

	tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;

	tcp_fastopen_add_skb(child, skb);

	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
	tp->rcv_wup = tp->rcv_nxt;
	/* tcp_conn_request() is sending the SYNACK,
	 * and queues the child into listener accept queue.
	 */
	return child;
}
Beispiel #9
0
static bool tcp_fastopen_create_child(struct sock *sk,
				      struct sk_buff *skb,
				      struct dst_entry *dst,
				      struct request_sock *req)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
	struct sock *child;

	req->num_retrans = 0;
	req->num_timeout = 0;
	req->sk = NULL;

	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
	if (child == NULL)
		return false;

	spin_lock(&queue->fastopenq->lock);
	queue->fastopenq->qlen++;
	spin_unlock(&queue->fastopenq->lock);

	/* Initialize the child socket. Have to fix some values to take
	 * into account the child is a Fast Open socket and is created
	 * only out of the bits carried in the SYN packet.
	 */
	tp = tcp_sk(child);

	tp->fastopen_rsk = req;
	/* Do a hold on the listner sk so that if the listener is being
	 * closed, the child that has been accepted can live on and still
	 * access listen_lock.
	 */
	sock_hold(sk);
	tcp_rsk(req)->listener = sk;

	/* RFC1323: The window in SYN & SYN/ACK segments is never
	 * scaled. So correct it appropriately.
	 */
	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);

	/* Activate the retrans timer so that SYNACK can be retransmitted.
	 * The request socket is not added to the SYN table of the parent
	 * because it's been added to the accept queue directly.
	 */
	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
				  TCP_TIMEOUT_INIT, TCP_RTO_MAX);

	/* Add the child socket directly into the accept queue */
	inet_csk_reqsk_queue_add(sk, req, child);

	/* Now finish processing the fastopen child socket. */
	inet_csk(child)->icsk_af_ops->rebuild_header(child);
	tcp_init_congestion_control(child);
	tcp_mtup_init(child);
	tcp_init_metrics(child);
	tcp_init_buffer_space(child);

	/* Queue the data carried in the SYN packet. We need to first
	 * bump skb's refcnt because the caller will attempt to free it.
	 *
	 * XXX (TFO) - we honor a zero-payload TFO request for now,
	 * (any reason not to?) but no need to queue the skb since
	 * there is no data. How about SYN+FIN?
	 */
	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) {
		skb = skb_get(skb);
		skb_dst_drop(skb);
		__skb_pull(skb, tcp_hdr(skb)->doff * 4);
		skb_set_owner_r(skb, child);
		__skb_queue_tail(&child->sk_receive_queue, skb);
		tp->syn_data_acked = 1;
	}
	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
	sk->sk_data_ready(sk, 0);
	bh_unlock_sock(child);
	sock_put(child);
	WARN_ON(req->sk == NULL);
	return true;
}