static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; __u64 seq; struct net *net = dev_net(skb->dev); if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } sk = inet6_lookup(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == NDISC_REDIRECT) { struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) dst->ops->redirect(dst, sk, skb); goto out; } if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (!ip6_sk_accept_pmtu(sk)) goto out; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; dst = inet6_csk_update_pmtu(sk, ntohl(info)); if (!dst) goto out; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) dccp_sync_mss(sk, dst_mtu(dst)); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; case DCCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, dh->dccph_dport, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (req == NULL) goto out; /* * ICMPs are not backlogged, hence we cannot get an established * socket here. */ WARN_ON(req->sk != NULL); if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case DCCP_REQUESTING: case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error * (see connect in sock.c) */ sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr; struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p = NULL, final; struct flowi fl; struct dst_entry *dst; int addr_type; int err; dp->dccps_role = DCCP_ROLE_CLIENT; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; memset(&fl, 0, sizeof(fl)); if (np->sndflow) { fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl.fl6_flowlabel); if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); fl6_sock_release(flowlabel); } } /* * connect() to INADDR_ANY means loopback (BSD'ism). */ if (ipv6_addr_any(&usin->sin6_addr)) usin->sin6_addr.s6_addr[15] = 1; addr_type = ipv6_addr_type(&usin->sin6_addr); if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type & IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { /* If interface is set while binding, indices * must coincide. */ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != usin->sin6_scope_id) return -EINVAL; sk->sk_bound_dev_if = usin->sin6_scope_id; } /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) return -EINVAL; } ipv6_addr_copy(&np->daddr, &usin->sin6_addr); np->flow_label = fl.fl6_flowlabel; /* * DCCP over IPv4 */ if (addr_type == IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); if (__ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; icsk->icsk_af_ops = &dccp_ipv6_mapped; sk->sk_backlog_rcv = dccp_v4_do_rcv; err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { icsk->icsk_ext_hdr_len = exthdrlen; icsk->icsk_af_ops = &dccp_ipv6_af_ops; sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; } else { ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), inet->saddr); ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF), inet->rcv_saddr); } return err; } if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->sport; security_sk_classify_flow(sk, &fl); if (np->opt != NULL && np->opt->srcrt != NULL) { const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; ipv6_addr_copy(&final, &fl.fl6_dst); ipv6_addr_copy(&fl.fl6_dst, rt0->addr); final_p = &final; } err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto failure; if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) goto failure; } if (saddr == NULL) { saddr = &fl.fl6_src; ipv6_addr_copy(&np->rcv_saddr, saddr); } /* set the source address */ ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt != NULL) icsk->icsk_ext_hdr_len = (np->opt->opt_flen + np->opt->opt_nflen); inet->dport = usin->sin6_port; dccp_set_state(sk, DCCP_REQUESTING); err = inet6_hash_connect(&dccp_death_row, sk); if (err) goto late_failure; dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, inet->sport, inet->dport); err = dccp_connect(sk); if (err) goto late_failure; return 0; late_failure: dccp_set_state(sk, DCCP_CLOSED); __sk_dst_reset(sk); failure: inet->dport = 0; sk->sk_route_caps = 0; return err; }
void dccp_close(struct sock *sk, long timeout) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; int state; lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == DCCP_LISTEN) { dccp_set_state(sk, DCCP_CLOSED); /* Special case. */ inet_csk_listen_stop(sk); goto adjudge_to_death; } sk_stop_timer(sk, &dp->dccps_xmit_timer); /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the *reader process may not have drained the data yet! */ /* FIXME: check for unread data */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { __kfree_skb(skb); } if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); } else if (dccp_close_state(sk)) { dccp_send_close(sk, 1); } sk_stream_wait_close(sk, timeout); adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); atomic_inc(sk->sk_prot->orphan_count); /* * It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); /* * Now socket is owned by kernel and we acquire BH lock * to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); BUG_TRAP(!sock_owned_by_user(sk)); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) goto out; /* * The last release_sock may have processed the CLOSE or RESET * packet moving sock to CLOSED state, if not we have to fire * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination" * in draft-ietf-dccp-spec-11. -acme */ if (sk->sk_state == DCCP_CLOSING) { /* FIXME: should start at 2 * RTT */ /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); #if 0 /* Yeah, we should use sk->sk_prot->orphan_count, etc */ dccp_set_state(sk, DCCP_CLOSED); #endif } if (sk->sk_state == DCCP_CLOSED) inet_csk_destroy_sock(sk); /* Otherwise, socket is reprieved until protocol close. */ out: bh_unlock_sock(sk); local_bh_enable(); sock_put(sk); }
static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; __u64 seq; struct net *net = dev_net(skb->dev); /* Only need dccph_dport & dccph_sport which are the first * 4 bytes in dccp header. * Our caller (icmpv6_notify()) already pulled 8 bytes for us. */ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, ntohs(dh->dccph_sport), inet6_iif(skb), 0); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return -ENOENT; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return 0; } seq = dccp_hdr_seq(dh); if (sk->sk_state == DCCP_NEW_SYN_RECV) { dccp_req_err(sk, seq); return 0; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == NDISC_REDIRECT) { if (!sock_owned_by_user(sk)) { struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) dst->ops->redirect(dst, sk, skb); } goto out; } if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (!ip6_sk_accept_pmtu(sk)) goto out; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; dst = inet6_csk_update_pmtu(sk, ntohl(info)); if (!dst) goto out; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) dccp_sync_mss(sk, dst_mtu(dst)); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { case DCCP_REQUESTING: case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error * (see connect in sock.c) */ sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); return 0; }
static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct inet_sock *newinet; struct dccp_sock *newdp; struct dccp6_sock *newdp6; struct sock *newsk; struct ipv6_txoptions *opt; if (skb->protocol == htons(ETH_P_IP)) { /* * v6 mapped */ newsk = dccp_v4_request_recv_sock(sk, skb, req, dst); if (newsk == NULL) return NULL; newdp6 = (struct dccp6_sock *)newsk; newdp = dccp_sk(newsk); newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF), newinet->daddr); ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF), newinet->saddr); ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count * here, dccp_create_openreq_child now does this for us, see the comment in * that function for the gory details. -acme */ /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 icsk.icsk_af_ops. Sync it now. */ dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } opt = np->opt; if (sk_acceptq_is_full(sk)) goto out_overflow; if (dst == NULL) { struct in6_addr *final_p = NULL, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); if (opt != NULL && opt->srcrt != NULL) { const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; ipv6_addr_copy(&final, &fl.fl6_dst); ipv6_addr_copy(&fl.fl6_dst, rt0->addr); final_p = &final; } ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto out; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks * count here, dccp_create_openreq_child now does this for us, see the * comment in that function for the gory details. -acme */ __ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newdp = dccp_sk(newsk); newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr); ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr); ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr); newsk->sk_bound_dev_if = ireq6->iif; /* Now IPv6 options... First: no IPv4 options. */ newinet->inet_opt = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; if (ireq6->pktopts != NULL) { newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC); kfree_skb(ireq6->pktopts); ireq6->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * Clone native IPv6 options from listening socket (if any) * * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ if (opt != NULL) { newnp->opt = ipv6_dup_options(newsk, opt); if (opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); } inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt != NULL) inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + newnp->opt->opt_flen); dccp_sync_mss(newsk, dst_mtu(dst)); newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; __inet6_hash(newsk); __inet_inherit_port(sk, newsk); return newsk; out_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); if (opt != NULL && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); return NULL; }
int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct dccp_hdr *dh, unsigned len) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int old_state = sk->sk_state; int queued = 0; /* * Step 3: Process LISTEN state * * If S.state == LISTEN, * If P.type == Request or P contains a valid Init Cookie option, * (* Must scan the packet's options to check for Init * Cookies. Only Init Cookies are processed here, * however; other options are processed in Step 8. This * scan need only be performed if the endpoint uses Init * Cookies *) * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair * S.state = RESPOND * Choose S.ISS (initial seqno) or set from Init Cookies * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init * Cookies Continue with S.state == RESPOND * (* A Response packet will be generated in Step 11 *) * Otherwise, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; /* FIXME: do congestion control initialization */ goto discard; } if (dh->dccph_type == DCCP_PKT_RESET) goto discard; /* Caller (dccp_v4_do_rcv) will send Reset */ dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; } if (sk->sk_state != DCCP_REQUESTING) { if (dccp_check_seqno(sk, skb)) goto discard; /* * Step 8: Process options and mark acknowledgeable */ if (dccp_parse_options(sk, skb)) goto discard; if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) goto discard; /* XXX see the comments in dccp_rcv_established about this */ if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER) ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); else ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); } /* * Step 9: Process Reset * If P.type == Reset, * Tear down connection * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return */ if (dh->dccph_type == DCCP_PKT_RESET) { /* * Queue the equivalent of TCP fin so that dccp_recvmsg * exits the loop */ dccp_fin(sk, skb); dccp_time_wait(sk, DCCP_TIME_WAIT, 0); return 0; /* * Step 7: Check for unexpected packet types * If (S.is_server and P.type == CloseReq) * or (S.is_server and P.type == Response) * or (S.is_client and P.type == Request) * or (S.state == RESPOND and P.type == Data), * Send Sync packet acknowledging P.seqno * Drop packet and return */ } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) || (dp->dccps_role == DCCP_ROLE_CLIENT && dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { dccp_rcv_closereq(sk, skb); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSE) { dccp_rcv_close(sk, skb); return 0; } if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); goto discard; } switch (sk->sk_state) { case DCCP_CLOSED: dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; case DCCP_REQUESTING: /* FIXME: do congestion control initialization */ queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); if (queued >= 0) return queued; __kfree_skb(skb); return 0; case DCCP_RESPOND: case DCCP_PARTOPEN: queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); break; } if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { switch (old_state) { case DCCP_PARTOPEN: sk->sk_state_change(sk); sk_wake_async(sk, 0, POLL_OUT); break; } } if (!queued) { discard: __kfree_skb(skb); } return 0; }
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; __be32 daddr, nexthop; int tmp; int err; dp->dccps_role = DCCP_ROLE_CLIENT; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; if (inet->opt != NULL && inet->opt->srr) { if (daddr == 0) return -EINVAL; nexthop = inet->opt->faddr; } tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, inet->inet_sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } if (inet->opt == NULL || !inet->opt->srr) daddr = rt->rt_dst; if (inet->inet_saddr == 0) inet->inet_saddr = rt->rt_src; inet->inet_rcv_saddr = inet->inet_saddr; inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt != NULL) inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; /* * Socket identity is still unknown (sport may be zero). * However we set state to DCCP_REQUESTING and not releasing socket * lock select source port, enter ourselves into the hash tables and * complete initialization after this. */ dccp_set_state(sk, DCCP_REQUESTING); err = inet_hash_connect(&dccp_death_row, sk); if (err != 0) goto failure; err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, inet->inet_dport, sk); if (err != 0) goto failure; /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->dst); dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, inet->inet_daddr, inet->inet_sport, inet->inet_dport); inet->inet_id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); rt = NULL; if (err != 0) goto failure; out: return err; failure: /* * This unhashes the socket and releases the local port, if necessary. */ dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; goto out; }
static int threadfn(void *data) { struct socket *sock; struct sockaddr_in addr; int size,i; char buf[bufsize+1]; if (sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock) < 0) { err("sock_create_kern"); goto out; } sock->sk->sk_reuse = 1; memset(&addr, '\0', sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); addr.sin_port = htons(9734); if (kernel_bind(sock, &addr, sizeof(addr)) < 0) { err("kernel_bind"); goto err; } if (kernel_listen(sock, 1024) < 0) { err("kernel_listen"); goto err; } while (!kthread_should_stop()) { struct socket *newsock; struct inet_connection_sock *icsk = inet_csk(sock->sk); struct socket_wq *wq; rcu_read_lock(); wq = rcu_dereference(sock->sk->sk_wq); wait_event_interruptible(wq->wait,!reqsk_queue_empty(&icsk->icsk_accept_queue) || kthread_should_stop()); if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) { if (kernel_accept(sock, &newsock, MSG_WAITALL) != 0) { err("kernel_accept"); goto err; } printk(KERN_INFO "Server : accept a new connection request.\n"); // transfer_data(newsock); memset(&buf, 0, bufsize+1); size = receive(newsock, buf, bufsize/2); printk(KERN_INFO "Server : received %d bytes\n", size); //data processing printk("Server : received data: %s\n", buf); size = receive(newsock, buf, bufsize/2); if (size < 0) { printk(KERN_INFO ": error getting datagram, sock_recvmsg error = %d\n", size); break; } else { printk(KERN_INFO "Server : received %d bytes\n", size); //data processing printk("Server : received data: %s\n", buf); for( i=0;i<bufsize/2;i++) { buf[i]+=1; } buf[i]=0 ; // sending //printk("buf=%s\n",buf); size=send(newsock, buf, bufsize/2+1); if(size<0) printk("Server : error send\n"); } //printk("ch =%c\n",ch); sock_release(newsock); } } err: sock_release(sock); out: return 0; }
/* * All SKB's seen here are completely headerless. It is our * job to build the DCCP header, and pass the packet down to * IP so it can do the same plus pass the packet off to the * device. */ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (likely(skb != NULL)) { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); struct dccp_hdr *dh; /* XXX For now we're using only 48 bits sequence numbers */ const u32 dccp_header_size = sizeof(*dh) + sizeof(struct dccp_hdr_ext) + dccp_packet_hdr_len(dcb->dccpd_type); int err, set_ack = 1; u64 ackno = dp->dccps_gsr; dccp_inc_seqno(&dp->dccps_gss); switch (dcb->dccpd_type) { case DCCP_PKT_DATA: set_ack = 0; /* fall through */ case DCCP_PKT_DATAACK: break; case DCCP_PKT_REQUEST: set_ack = 0; /* fall through */ case DCCP_PKT_SYNC: case DCCP_PKT_SYNCACK: ackno = dcb->dccpd_seq; /* fall through */ default: /* * Only data packets should come through with skb->sk * set. */ WARN_ON(skb->sk); skb_set_owner_w(skb, sk); break; } dcb->dccpd_seq = dp->dccps_gss; if (dccp_insert_options(sk, skb)) { kfree_skb(skb); return -EPROTO; } /* Build DCCP header and checksum it. */ dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_type = dcb->dccpd_type; dh->dccph_sport = inet->sport; dh->dccph_dport = inet->dport; dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; dh->dccph_ccval = dcb->dccpd_ccval; dh->dccph_cscov = dp->dccps_pcslen; /* XXX For now we're using only 48 bits sequence numbers */ dh->dccph_x = 1; dp->dccps_awh = dp->dccps_gss; dccp_hdr_set_seq(dh, dp->dccps_gss); if (set_ack) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); switch (dcb->dccpd_type) { case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = dp->dccps_service; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; break; } icsk->icsk_af_ops->send_check(sk, 0, skb); if (set_ack) dccp_event_ack_sent(sk); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = icsk->icsk_af_ops->queue_xmit(skb, 0); return net_xmit_eval(err); } return -ENOBUFS; }
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; const struct inet_diag_handler *handler; int ext = req->idiag_ext; handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(handler == NULL); nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); BUG_ON(sk->sk_state == TCP_TIME_WAIT); r->idiag_family = sk->sk_family; r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; r->id.idiag_if = sk->sk_bound_dev_if; sock_diag_save_cookie(sk, r->id.idiag_cookie); r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = inet->inet_dport; r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) goto errout; /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, * hence this needs to be included regardless of socket family. */ if (ext & (1 << (INET_DIAG_TOS - 1))) if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) goto errout; #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; *(struct in6_addr *)r->id.idiag_dst = np->daddr; if (ext & (1 << (INET_DIAG_TCLASS - 1))) if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0) goto errout; } #endif r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), .idiag_wmem = sk->sk_wmem_queued, .idiag_fmem = sk->sk_forward_alloc, .idiag_tmem = sk_wmem_alloc_get(sk), }; if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0) goto errout; } if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) goto errout; if (icsk == NULL) { handler->idiag_get_info(sk, r, NULL); goto out; } #define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) if (icsk->icsk_pending == ICSK_TIME_RETRANS) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); } else { r->idiag_timer = 0; r->idiag_expires = 0; } #undef EXPIRES_IN_MS if (ext & (1 << (INET_DIAG_INFO - 1))) { attr = nla_reserve(skb, INET_DIAG_INFO, sizeof(struct tcp_info)); if (!attr) goto errout; info = nla_data(attr); } if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) if (nla_put_string(skb, INET_DIAG_CONG, icsk->icsk_ca_ops->name) < 0) goto errout; handler->idiag_get_info(sk, r, info); if (sk->sk_state < TCP_TIME_WAIT && icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) icsk->icsk_ca_ops->get_info(sk, ext, skb); out: return nlmsg_end(skb, nlh); errout: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns, portid, seq, nlmsg_flags, unlh); } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, struct sk_buff *skb, struct inet_diag_req_v2 *req, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { long tmo; struct inet_diag_msg *r; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); tmo = tw->tw_ttd - jiffies; if (tmo < 0) tmo = 0; r->idiag_family = tw->tw_family; r->idiag_retrans = 0; r->id.idiag_if = tw->tw_bound_dev_if; sock_diag_save_cookie(tw, r->id.idiag_cookie); r->id.idiag_sport = tw->tw_sport; r->id.idiag_dport = tw->tw_dport; r->id.idiag_src[0] = tw->tw_rcv_saddr; r->id.idiag_dst[0] = tw->tw_daddr; r->idiag_state = tw->tw_substate; r->idiag_timer = 3; r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; r->idiag_inode = 0; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr; *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; } #endif return nlmsg_end(skb, nlh); }
//start_listening void moon_listen(void) { //For test purposes moonraker_socket_t *listen; listen = kmalloc(sizeof(moonraker_socket_t),GFP_KERNEL); listen->proto = kmalloc(sizeof(moonraker_proto_t),GFP_KERNEL); listen->proto->name = kmalloc(5,GFP_KERNEL); listen->ip = 2130706433; listen->port = 80; listen->proto->defer_accept=0; listen->keepalive_timeout=0; listen->ack_pingpong=1; listen->max_backlog=2048; listen->defer_accept=1; strcpy(listen->proto->name,"http"); //end for test purpose struct sockaddr_in sin; struct socket *sock = NULL; struct sock *sk; struct tcp_sock *tp; struct inet_connection_sock *icsk; moonraker_proto_t *proto = listen->proto; u16 port = listen->port; u32 addr = listen->ip; //127.0.0.1 int err = 0; err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (err) { printk(KERN_ERR "Moonraker: error %d creating socket.\n", err); goto error; } sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl(addr); sin.sin_port = htons(port); sk = sock->sk; icsk = inet_csk(sk); sk->sk_reuse = 1; sock_set_flag(sk, SOCK_URGINLINE); err = sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); if (err){ printk(KERN_ERR "Moonraker: error %d binding socket. This means that probably some other process is (or was a short time ago) using addr %d\n",err,sin.sin_addr.s_addr); goto error; } tp = tcp_sk(sk); printk("listen sk accept_queue: %d.\n",!reqsk_queue_empty(&icsk->icsk_accept_queue)); icsk->icsk_ack.pingpong = listen->ack_pingpong; sock_reset_flag(sk, SOCK_LINGER); sk->sk_lingertime = 0; tp->linger2 = listen->keepalive_timeout * HZ; if (proto->defer_accept && !listen->keepalive_timeout && listen->defer_accept) icsk->icsk_accept_queue.rskq_defer_accept = 1; err = sock->ops->listen(sock, listen->max_backlog); if (err) { printk(KERN_ERR "Moonraker: error %d listening on socket.\n", err); goto error; } printk(KERN_NOTICE "Moonraker: thread %d listens on %s://%d.%d.%d.%d:%d.\n", 1, proto->name, HIPQUAD(addr), port); // return sock; return; error: if (sock) sock_release(sock); return; return NULL; }
static void ctcp_set_state(struct sock *sk, u8 new_state) { inet_csk(sk)->icsk_ca_state = TCP_CA_Open; }
static bool tcp_fastopen_create_child(struct sock *sk, struct sk_buff *skb, struct dst_entry *dst, struct request_sock *req) { struct tcp_sock *tp = tcp_sk(sk); struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; struct sock *child; req->num_retrans = 0; req->num_timeout = 0; req->sk = NULL; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); if (child == NULL) return false; spin_lock(&queue->fastopenq->lock); queue->fastopenq->qlen++; spin_unlock(&queue->fastopenq->lock); /* Initialize the child socket. Have to fix some values to take * into account the child is a Fast Open socket and is created * only out of the bits carried in the SYN packet. */ tp = tcp_sk(child); tp->fastopen_rsk = req; /* Do a hold on the listner sk so that if the listener is being * closed, the child that has been accepted can live on and still * access listen_lock. */ sock_hold(sk); tcp_rsk(req)->listener = sk; /* RFC1323: The window in SYN & SYN/ACK segments is never * scaled. So correct it appropriately. */ tp->snd_wnd = ntohs(tcp_hdr(skb)->window); /* Activate the retrans timer so that SYNACK can be retransmitted. * The request socket is not added to the SYN table of the parent * because it's been added to the accept queue directly. */ inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT, TCP_RTO_MAX); /* Add the child socket directly into the accept queue */ inet_csk_reqsk_queue_add(sk, req, child); /* Now finish processing the fastopen child socket. */ inet_csk(child)->icsk_af_ops->rebuild_header(child); tcp_init_congestion_control(child); tcp_mtup_init(child); tcp_init_metrics(child); tcp_init_buffer_space(child); /* Queue the data carried in the SYN packet. We need to first * bump skb's refcnt because the caller will attempt to free it. * * XXX (TFO) - we honor a zero-payload TFO request for now, * (any reason not to?) but no need to queue the skb since * there is no data. How about SYN+FIN? */ if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) { skb = skb_get(skb); skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); skb_set_owner_r(skb, child); __skb_queue_tail(&child->sk_receive_queue, skb); tp->syn_data_acked = 1; } tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; sk->sk_data_ready(sk, 0); bh_unlock_sock(child); sock_put(child); WARN_ON(req->sk == NULL); return true; }
/* * All SKB's seen here are completely headerless. It is our * job to build the DCCP header, and pass the packet down to * IP so it can do the same plus pass the packet off to the * device. */ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (likely(skb != NULL)) { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); struct dccp_hdr *dh; /* XXX For now we're using only 48 bits sequence numbers */ const u32 dccp_header_size = sizeof(*dh) + sizeof(struct dccp_hdr_ext) + dccp_packet_hdr_len(dcb->dccpd_type); int err, set_ack = 1; u64 ackno = dp->dccps_gsr; dccp_inc_seqno(&dp->dccps_gss); switch (dcb->dccpd_type) { case DCCP_PKT_DATA: set_ack = 0; /* fall through */ case DCCP_PKT_DATAACK: break; case DCCP_PKT_REQUEST: set_ack = 0; /* fall through */ case DCCP_PKT_SYNC: case DCCP_PKT_SYNCACK: ackno = dcb->dccpd_seq; /* fall through */ default: /* * Only data packets should come through with skb->sk * set. */ WARN_ON(skb->sk); skb_set_owner_w(skb, sk); break; } dcb->dccpd_seq = dp->dccps_gss; if (dccp_insert_options(sk, skb)) { kfree_skb(skb); return -EPROTO; } skb->h.raw = skb_push(skb, dccp_header_size); dh = dccp_hdr(skb); /* Build DCCP header and checksum it. */ memset(dh, 0, dccp_header_size); dh->dccph_type = dcb->dccpd_type; dh->dccph_sport = inet->sport; dh->dccph_dport = inet->dport; dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; dh->dccph_ccval = dcb->dccpd_ccval; /* XXX For now we're using only 48 bits sequence numbers */ dh->dccph_x = 1; dp->dccps_awh = dp->dccps_gss; dccp_hdr_set_seq(dh, dp->dccps_gss); if (set_ack) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); switch (dcb->dccpd_type) { case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = dp->dccps_service; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; break; } icsk->icsk_af_ops->send_check(sk, skb->len, skb); if (set_ack) dccp_event_ack_sent(sk); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (err <= 0) return err; /* NET_XMIT_CN is special. It does not guarantee, * that this packet is lost. It tells that device * is about to start to drop packets or already * drops some packets of the same priority and * invokes us to send less aggressively. */ return err == NET_XMIT_CN ? 0 : err; } return -ENOBUFS; }
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; __be32 daddr, nexthop; int tmp; int err; dp->dccps_role = DCCP_ROLE_CLIENT; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; if (inet->opt != NULL && inet->opt->srr) { if (daddr == 0) return -EINVAL; nexthop = inet->opt->faddr; } tmp = ip_route_connect(&rt, nexthop, inet->saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, inet->sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } if (inet->opt == NULL || !inet->opt->srr) daddr = rt->rt_dst; if (inet->saddr == 0) inet->saddr = rt->rt_src; inet->rcv_saddr = inet->saddr; inet->dport = usin->sin_port; inet->daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt != NULL) inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; dccp_set_state(sk, DCCP_REQUESTING); err = inet_hash_connect(&dccp_death_row, sk); if (err != 0) goto failure; err = ip_route_newports(&rt, IPPROTO_DCCP, inet->sport, inet->dport, sk); if (err != 0) goto failure; sk_setup_caps(sk, &rt->u.dst); dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr, inet->sport, inet->dport); inet->id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); rt = NULL; if (err != 0) goto failure; out: return err; failure: dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; inet->dport = 0; goto out; }
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct rt6_info *rt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } if(ipv6_addr_any(&usin->sin6_addr)) usin->sin6_addr.s6_addr[15] = 0x1; addr_type = ipv6_addr_type(&usin->sin6_addr); if(addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type&IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != usin->sin6_scope_id) return -EINVAL; sk->sk_bound_dev_if = usin->sin6_scope_id; } if (!sk->sk_bound_dev_if) return -EINVAL; } if (tp->rx_opt.ts_recent_stamp && !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; tp->write_seq = 0; } np->daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; if (addr_type == IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); if (__ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; icsk->icsk_af_ops = &ipv6_mapped; sk->sk_backlog_rcv = tcp_v4_do_rcv; #ifdef CONFIG_TCP_MD5SIG tp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { icsk->icsk_ext_hdr_len = exthdrlen; icsk->icsk_af_ops = &ipv6_specific; sk->sk_backlog_rcv = tcp_v6_do_rcv; #ifdef CONFIG_TCP_MD5SIG tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; } else { ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); } return err; } if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = np->daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; final_p = fl6_update_dst(&fl6, np->opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } if (saddr == NULL) { saddr = &fl6.saddr; np->rcv_saddr = *saddr; } np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(sk, dst, NULL, NULL); rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) { struct inet_peer *peer = rt6_get_peer(rt); if (peer) { inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; tp->rx_opt.ts_recent = peer->tcp_ts; } } } icsk->icsk_ext_hdr_len = 0; if (np->opt) icsk->icsk_ext_hdr_len = (np->opt->opt_flen + np->opt->opt_nflen); tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); err = inet6_hash_connect(&tcp_death_row, sk); if (err) goto late_failure; if (!tp->write_seq) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, inet->inet_sport, inet->inet_dport); err = tcp_connect(sk); if (err) goto late_failure; return 0; late_failure: tcp_set_state(sk, TCP_CLOSE); __sk_dst_reset(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; return err; }
static int dccp_rcv_request_sent_state_process(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned len) { /* * Step 4: Prepare sequence numbers in REQUEST * If S.state == REQUEST, * If (P.type == Response or P.type == Reset) * and S.AWL <= P.ackno <= S.AWH, * / * Set sequence number variables corresponding to the * other endpoint, so P will pass the tests in Step 6 * / * Set S.GSR, S.ISR, S.SWL, S.SWH * / * Response processing continues in Step 10; Reset * processing continues in Step 9 * / */ if (dh->dccph_type == DCCP_PKT_RESPONSE) { const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); BUG_TRAP(sk->sk_send_head != NULL); __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { dccp_pr_debug("invalid ackno: S.AWL=%llu, " "P.ackno=%llu, S.AWH=%llu \n", (unsigned long long)dp->dccps_awl, (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, (unsigned long long)dp->dccps_awh); goto out_invalid_packet; } if (dccp_parse_options(sk, skb)) goto out_invalid_packet; if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) goto out_invalid_packet; /* FIXME: change error code */ dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; dccp_update_gsr(sk, dp->dccps_isr); /* * SWL and AWL are initially adjusted so that they are not less than * the initial Sequence Numbers received and sent, respectively: * SWL := max(GSR + 1 - floor(W/4), ISR), * AWL := max(GSS - W' + 1, ISS). * These adjustments MUST be applied only at the beginning of the * connection. * * AWL was adjusted in dccp_v4_connect -acme */ dccp_set_seqno(&dp->dccps_swl, max48(dp->dccps_swl, dp->dccps_isr)); dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); /* * Step 10: Process REQUEST state (second part) * If S.state == REQUEST, * / * If we get here, P is a valid Response from the * server (see Step 4), and we should move to * PARTOPEN state. PARTOPEN means send an Ack, * don't send Data packets, retransmit Acks * periodically, and always include any Init Cookie * from the Response * / * S.state := PARTOPEN * Set PARTOPEN timer * Continue with S.state == PARTOPEN * / * Step 12 will send the Ack completing the * three-way handshake * / */ dccp_set_state(sk, DCCP_PARTOPEN); /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); sk_wake_async(sk, 0, POLL_OUT); } if (sk->sk_write_pending || icsk->icsk_ack.pingpong || icsk->icsk_accept_queue.rskq_defer_accept) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * * It may be deleted, but with this feature tcpdumps * look so _wonderfully_ clever, that I was not able * to stand against the temptation 8) --ANK */ /* * OK, in DCCP we can as well do a similar trick, its * even in the draft, but there is no need for us to * schedule an ack here, as dccp_sendmsg does this for * us, also stated in the draft. -acme */ __kfree_skb(skb); return 0; } dccp_send_ack(sk); return -1; } out_invalid_packet: /* dccp_v4_do_rcv will send a reset */ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; return 1; }
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; int err; struct tcp_sock *tp; __u32 seq; struct net *net = dev_net(skb->dev); sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == TCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); goto out; } tp = tcp_sk(sk); seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) goto out; dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = np->daddr; fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); goto out; } } else dst_hold(dst); if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } dst_release(dst); goto out; } icmpv6_err_convert(type, code, &err); switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (!req) goto out; WARN_ON(req->sk != NULL); if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: case TCP_SYN_RECV: if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); tcp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct inet_sock *inet = inet_sk(sk); int val = 0, err; switch (optname) { case IP_PKTINFO: case IP_RECVTTL: case IP_RECVOPTS: case IP_RECVTOS: case IP_RETOPTS: case IP_TOS: case IP_TTL: case IP_HDRINCL: case IP_MTU_DISCOVER: case IP_RECVERR: case IP_ROUTER_ALERT: case IP_FREEBIND: case IP_PASSSEC: case IP_TRANSPARENT: case IP_MINTTL: case IP_NODEFRAG: case IP_MULTICAST_TTL: case IP_MULTICAST_ALL: case IP_MULTICAST_LOOP: case IP_RECVORIGDSTADDR: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; } else if (optlen >= sizeof(char)) { unsigned char ucval; if (get_user(ucval, (unsigned char __user *) optval)) return -EFAULT; val = (int) ucval; } } /* If optlen==0, it is equivalent to val == 0 */ if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk, optname, optval, optlen); err = 0; lock_sock(sk); switch (optname) { case IP_OPTIONS: { struct ip_options_rcu *old, *opt = NULL; if (optlen > 40) goto e_inval; err = ip_options_get_from_user(sock_net(sk), &opt, optval, optlen); if (err) break; old = rcu_dereference_protected(inet->inet_opt, sock_owned_by_user(sk)); if (inet->is_icsk) { struct inet_connection_sock *icsk = inet_csk(sk); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (sk->sk_family == PF_INET || (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && inet->inet_daddr != LOOPBACK4_IPV6)) { #endif if (old) icsk->icsk_ext_hdr_len -= old->opt.optlen; if (opt) icsk->icsk_ext_hdr_len += opt->opt.optlen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) } #endif } rcu_assign_pointer(inet->inet_opt, opt); if (old) call_rcu(&old->rcu, opt_kfree_rcu); break; } case IP_PKTINFO: if (val) inet->cmsg_flags |= IP_CMSG_PKTINFO; else inet->cmsg_flags &= ~IP_CMSG_PKTINFO; break; case IP_RECVTTL: if (val) inet->cmsg_flags |= IP_CMSG_TTL; else inet->cmsg_flags &= ~IP_CMSG_TTL; break; case IP_RECVTOS: if (val) inet->cmsg_flags |= IP_CMSG_TOS; else inet->cmsg_flags &= ~IP_CMSG_TOS; break; case IP_RECVOPTS: if (val) inet->cmsg_flags |= IP_CMSG_RECVOPTS; else inet->cmsg_flags &= ~IP_CMSG_RECVOPTS; break; case IP_RETOPTS: if (val) inet->cmsg_flags |= IP_CMSG_RETOPTS; else inet->cmsg_flags &= ~IP_CMSG_RETOPTS; break; case IP_PASSSEC: if (val) inet->cmsg_flags |= IP_CMSG_PASSSEC; else inet->cmsg_flags &= ~IP_CMSG_PASSSEC; break; case IP_RECVORIGDSTADDR: if (val) inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; else inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; val |= inet->tos & INET_ECN_MASK; } if (inet->tos != val) { inet->tos = val; sk->sk_priority = rt_tos2priority(val); sk_dst_reset(sk); } break; case IP_TTL: if (optlen < 1) goto e_inval; if (val != -1 && (val < 1 || val > 255)) goto e_inval; inet->uc_ttl = val; break; case IP_HDRINCL: if (sk->sk_type != SOCK_RAW) { err = -ENOPROTOOPT; break; } inet->hdrincl = val ? 1 : 0; break; case IP_NODEFRAG: if (sk->sk_type != SOCK_RAW) { err = -ENOPROTOOPT; break; } inet->nodefrag = val ? 1 : 0; break; case IP_MTU_DISCOVER: if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) goto e_inval; inet->pmtudisc = val; break; case IP_RECVERR: inet->recverr = !!val; if (!val) skb_queue_purge(&sk->sk_error_queue); break; case IP_MULTICAST_TTL: if (sk->sk_type == SOCK_STREAM) goto e_inval; if (optlen < 1) goto e_inval; if (val == -1) val = 1; if (val < 0 || val > 255) goto e_inval; inet->mc_ttl = val; break; case IP_MULTICAST_LOOP: if (optlen < 1) goto e_inval; inet->mc_loop = !!val; break; case IP_MULTICAST_IF: { struct ip_mreqn mreq; struct net_device *dev = NULL; if (sk->sk_type == SOCK_STREAM) goto e_inval; /* * Check the arguments are allowable */ if (optlen < sizeof(struct in_addr)) goto e_inval; err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { if (copy_from_user(&mreq, optval, sizeof(mreq))) break; } else { memset(&mreq, 0, sizeof(mreq)); if (optlen >= sizeof(struct ip_mreq)) { if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) break; } else if (optlen >= sizeof(struct in_addr)) { if (copy_from_user(&mreq.imr_address, optval, sizeof(struct in_addr))) break; } } if (!mreq.imr_ifindex) { if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { inet->mc_index = 0; inet->mc_addr = 0; err = 0; break; } dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); if (dev) mreq.imr_ifindex = dev->ifindex; } else dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); err = -EADDRNOTAVAIL; if (!dev) break; dev_put(dev); err = -EINVAL; if (sk->sk_bound_dev_if && mreq.imr_ifindex != sk->sk_bound_dev_if) break; inet->mc_index = mreq.imr_ifindex; inet->mc_addr = mreq.imr_address.s_addr; err = 0; break; } case IP_ADD_MEMBERSHIP: case IP_DROP_MEMBERSHIP: { struct ip_mreqn mreq; err = -EPROTO; if (inet_sk(sk)->is_icsk) break; if (optlen < sizeof(struct ip_mreq)) goto e_inval; err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { if (copy_from_user(&mreq, optval, sizeof(mreq))) break; } else { memset(&mreq, 0, sizeof(mreq)); if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) break; } if (optname == IP_ADD_MEMBERSHIP) err = ip_mc_join_group(sk, &mreq); else err = ip_mc_leave_group(sk, &mreq); break; } case IP_MSFILTER: { struct ip_msfilter *msf; if (optlen < IP_MSFILTER_SIZE(0)) goto e_inval; if (optlen > sysctl_optmem_max) { err = -ENOBUFS; break; } msf = kmalloc(optlen, GFP_KERNEL); if (!msf) { err = -ENOBUFS; break; } err = -EFAULT; if (copy_from_user(msf, optval, optlen)) { kfree(msf); break; } /* numsrc >= (1G-4) overflow in 32 bits */ if (msf->imsf_numsrc >= 0x3ffffffcU || msf->imsf_numsrc > sysctl_igmp_max_msf) { kfree(msf); err = -ENOBUFS; break; } if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) { kfree(msf); err = -EINVAL; break; } err = ip_mc_msfilter(sk, msf, 0); kfree(msf); break; } case IP_BLOCK_SOURCE: case IP_UNBLOCK_SOURCE: case IP_ADD_SOURCE_MEMBERSHIP: case IP_DROP_SOURCE_MEMBERSHIP: { struct ip_mreq_source mreqs; int omode, add; if (optlen != sizeof(struct ip_mreq_source)) goto e_inval; if (copy_from_user(&mreqs, optval, sizeof(mreqs))) { err = -EFAULT; break; } if (optname == IP_BLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 1; } else if (optname == IP_UNBLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 0; } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) { struct ip_mreqn mreq; mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; mreq.imr_address.s_addr = mreqs.imr_interface; mreq.imr_ifindex = 0; err = ip_mc_join_group(sk, &mreq); if (err && err != -EADDRINUSE) break; omode = MCAST_INCLUDE; add = 1; } else /* IP_DROP_SOURCE_MEMBERSHIP */ { omode = MCAST_INCLUDE; add = 0; } err = ip_mc_source(add, omode, sk, &mreqs, 0); break; } case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: { struct group_req greq; struct sockaddr_in *psin; struct ip_mreqn mreq; if (optlen < sizeof(struct group_req)) goto e_inval; err = -EFAULT; if (copy_from_user(&greq, optval, sizeof(greq))) break; psin = (struct sockaddr_in *)&greq.gr_group; if (psin->sin_family != AF_INET) goto e_inval; memset(&mreq, 0, sizeof(mreq)); mreq.imr_multiaddr = psin->sin_addr; mreq.imr_ifindex = greq.gr_interface; if (optname == MCAST_JOIN_GROUP) err = ip_mc_join_group(sk, &mreq); else err = ip_mc_leave_group(sk, &mreq); break; } case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: { struct group_source_req greqs; struct ip_mreq_source mreqs; struct sockaddr_in *psin; int omode, add; if (optlen != sizeof(struct group_source_req)) goto e_inval; if (copy_from_user(&greqs, optval, sizeof(greqs))) { err = -EFAULT; break; } if (greqs.gsr_group.ss_family != AF_INET || greqs.gsr_source.ss_family != AF_INET) { err = -EADDRNOTAVAIL; break; } psin = (struct sockaddr_in *)&greqs.gsr_group; mreqs.imr_multiaddr = psin->sin_addr.s_addr; psin = (struct sockaddr_in *)&greqs.gsr_source; mreqs.imr_sourceaddr = psin->sin_addr.s_addr; mreqs.imr_interface = 0; /* use index for mc_source */ if (optname == MCAST_BLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 1; } else if (optname == MCAST_UNBLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 0; } else if (optname == MCAST_JOIN_SOURCE_GROUP) { struct ip_mreqn mreq; psin = (struct sockaddr_in *)&greqs.gsr_group; mreq.imr_multiaddr = psin->sin_addr; mreq.imr_address.s_addr = 0; mreq.imr_ifindex = greqs.gsr_interface; err = ip_mc_join_group(sk, &mreq); if (err && err != -EADDRINUSE) break; greqs.gsr_interface = mreq.imr_ifindex; omode = MCAST_INCLUDE; add = 1; } else /* MCAST_LEAVE_SOURCE_GROUP */ { omode = MCAST_INCLUDE; add = 0; } err = ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface); break; } case MCAST_MSFILTER: { struct sockaddr_in *psin; struct ip_msfilter *msf = NULL; struct group_filter *gsf = NULL; int msize, i, ifindex; if (optlen < GROUP_FILTER_SIZE(0)) goto e_inval; if (optlen > sysctl_optmem_max) { err = -ENOBUFS; break; } gsf = kmalloc(optlen, GFP_KERNEL); if (!gsf) { err = -ENOBUFS; break; } err = -EFAULT; if (copy_from_user(gsf, optval, optlen)) goto mc_msf_out; /* numsrc >= (4G-140)/128 overflow in 32 bits */ if (gsf->gf_numsrc >= 0x1ffffff || gsf->gf_numsrc > sysctl_igmp_max_msf) { err = -ENOBUFS; goto mc_msf_out; } if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { err = -EINVAL; goto mc_msf_out; } msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); msf = kmalloc(msize, GFP_KERNEL); if (!msf) { err = -ENOBUFS; goto mc_msf_out; } ifindex = gsf->gf_interface; psin = (struct sockaddr_in *)&gsf->gf_group; if (psin->sin_family != AF_INET) { err = -EADDRNOTAVAIL; goto mc_msf_out; } msf->imsf_multiaddr = psin->sin_addr.s_addr; msf->imsf_interface = 0; msf->imsf_fmode = gsf->gf_fmode; msf->imsf_numsrc = gsf->gf_numsrc; err = -EADDRNOTAVAIL; for (i = 0; i < gsf->gf_numsrc; ++i) { psin = (struct sockaddr_in *)&gsf->gf_slist[i]; if (psin->sin_family != AF_INET) goto mc_msf_out; msf->imsf_slist[i] = psin->sin_addr.s_addr; } kfree(gsf); gsf = NULL; err = ip_mc_msfilter(sk, msf, ifindex); mc_msf_out: kfree(msf); kfree(gsf); break; } case IP_MULTICAST_ALL: if (optlen < 1) goto e_inval; if (val != 0 && val != 1) goto e_inval; inet->mc_all = val; break; case IP_ROUTER_ALERT: err = ip_ra_control(sk, val ? 1 : 0, NULL); break; case IP_FREEBIND: if (optlen < 1) goto e_inval; inet->freebind = !!val; break; case IP_IPSEC_POLICY: case IP_XFRM_POLICY: err = -EPERM; if (!capable(CAP_NET_ADMIN)) break; err = xfrm_user_policy(sk, optname, optval, optlen); break; case IP_TRANSPARENT: if (!!val && !capable(CAP_NET_RAW) && !capable(CAP_NET_ADMIN)) { err = -EPERM; break; } if (optlen < 1) goto e_inval; inet->transparent = !!val; break; case IP_MINTTL: if (optlen < 1) goto e_inval; if (val < 0 || val > 255) goto e_inval; inet->min_ttl = val; break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; e_inval: release_sock(sk); return -EINVAL; }
/* Save metrics learned by this TCP session. This function is called * only, when TCP finishes successfully i.e. when it enters TIME-WAIT * or goes from LAST-ACK to CLOSE. */ void tcp_update_metrics(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_metrics_block *tm; unsigned long rtt; u32 val; int m; if (sysctl_tcp_nometrics_save || !dst) return; if (dst->flags & DST_HOST) dst_confirm(dst); rcu_read_lock(); if (icsk->icsk_backoff || !tp->srtt_us) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. Reset our * results. */ tm = tcp_get_metrics(sk, dst, false); if (tm && !tcp_metric_locked(tm, TCP_METRIC_RTT)) tcp_metric_set(tm, TCP_METRIC_RTT, 0); goto out_unlock; } else tm = tcp_get_metrics(sk, dst, true); if (!tm) goto out_unlock; rtt = tcp_metric_get(tm, TCP_METRIC_RTT); m = rtt - tp->srtt_us; /* If newly calculated rtt larger than stored one, store new * one. Otherwise, use EWMA. Remember, rtt overestimation is * always better than underestimation. */ if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { if (m <= 0) rtt = tp->srtt_us; else rtt -= (m >> 3); tcp_metric_set(tm, TCP_METRIC_RTT, rtt); } if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { unsigned long var; if (m < 0) m = -m; /* Scale deviation to rttvar fixed point */ m >>= 1; if (m < tp->mdev_us) m = tp->mdev_us; var = tcp_metric_get(tm, TCP_METRIC_RTTVAR); if (m >= var) var = m; else var -= (var - m) >> 2; tcp_metric_set(tm, TCP_METRIC_RTTVAR, var); }
static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, bool *own_req) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp; const struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; if (skb->protocol == htons(ETH_P_IP)) { /* * v6 mapped */ newsk = dccp_v4_request_recv_sock(sk, skb, req, dst, req_unhash, own_req); if (newsk == NULL) return NULL; newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->mcast_oif = inet_iif(skb); newnp->mcast_hops = ip_hdr(skb)->ttl; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count * here, dccp_create_openreq_child now does this for us, see the comment in * that function for the gory details. -acme */ /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 icsk.icsk_af_ops. Sync it now. */ dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } if (sk_acceptq_is_full(sk)) goto out_overflow; if (!dst) { struct flowi6 fl6; dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP); if (!dst) goto out; } newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto out_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks * count here, dccp_create_openreq_child now does this for us, see the * comment in that function for the gory details. -acme */ ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... First: no IPv4 options. */ newinet->inet_opt = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * Clone native IPv6 options from listening socket (if any) * * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); if (opt) { opt = ipv6_dup_options(newsk, opt); RCU_INIT_POINTER(newnp->opt, opt); } inet_csk(newsk)->icsk_ext_hdr_len = 0; if (opt) inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); dccp_done(newsk); goto out; } *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); consume_skb(ireq->pktopts); ireq->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } return newsk; out_overflow: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: dst_release(dst); out: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; }
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p = NULL, final; struct flowi fl; struct dst_entry *dst; int addr_type; int err; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (usin->sin6_family != AF_INET6) return(-EAFNOSUPPORT); memset(&fl, 0, sizeof(fl)); if (np->sndflow) { fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl.fl6_flowlabel); if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); fl6_sock_release(flowlabel); } } /* * connect() to INADDR_ANY means loopback (BSD'ism). */ if(ipv6_addr_any(&usin->sin6_addr)) usin->sin6_addr.s6_addr[15] = 0x1; addr_type = ipv6_addr_type(&usin->sin6_addr); if(addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type&IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { /* If interface is set while binding, indices * must coincide. */ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != usin->sin6_scope_id) return -EINVAL; sk->sk_bound_dev_if = usin->sin6_scope_id; } /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) return -EINVAL; } if (tp->rx_opt.ts_recent_stamp && !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; tp->write_seq = 0; } ipv6_addr_copy(&np->daddr, &usin->sin6_addr); np->flow_label = fl.fl6_flowlabel; /* * TCP over IPv4 */ if (addr_type == IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); if (__ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; icsk->icsk_af_ops = &ipv6_mapped; sk->sk_backlog_rcv = tcp_v4_do_rcv; err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { icsk->icsk_ext_hdr_len = exthdrlen; icsk->icsk_af_ops = &ipv6_specific; sk->sk_backlog_rcv = tcp_v6_do_rcv; goto failure; } else { ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), inet->saddr); ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF), inet->rcv_saddr); } return err; } if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, (saddr ? saddr : &np->saddr)); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->sport; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; ipv6_addr_copy(&final, &fl.fl6_dst); ipv6_addr_copy(&fl.fl6_dst, rt0->addr); final_p = &final; } err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto failure; if (final_p) { ipv6_addr_copy(&fl.fl6_dst, final_p); fl.flags |= FLOWI_FLAG_NOTROUTE; } if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto failure; if (saddr == NULL) { saddr = &fl.fl6_src; ipv6_addr_copy(&np->rcv_saddr, saddr); } /* set the source address */ ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; ip6_dst_store(sk, dst, NULL); sk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); icsk->icsk_ext_hdr_len = 0; if (np->opt) icsk->icsk_ext_hdr_len = (np->opt->opt_flen + np->opt->opt_nflen); tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); inet->dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); err = inet6_hash_connect(&tcp_death_row, sk); if (err) goto late_failure; if (!tp->write_seq) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, inet->sport, inet->dport); err = tcp_connect(sk); if (err) goto late_failure; return 0; late_failure: tcp_set_state(sk, TCP_CLOSE); __sk_dst_reset(sk); failure: inet->dport = 0; sk->sk_route_caps = 0; return err; }
static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr; struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; dp->dccps_role = DCCP_ROLE_CLIENT; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; fl6_sock_release(flowlabel); } } /* * connect() to INADDR_ANY means loopback (BSD'ism). */ if (ipv6_addr_any(&usin->sin6_addr)) usin->sin6_addr.s6_addr[15] = 1; addr_type = ipv6_addr_type(&usin->sin6_addr); if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type & IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { /* If interface is set while binding, indices * must coincide. */ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != usin->sin6_scope_id) return -EINVAL; sk->sk_bound_dev_if = usin->sin6_scope_id; } /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) return -EINVAL; } sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* * DCCP over IPv4 */ if (addr_type == IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); if (__ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; icsk->icsk_af_ops = &dccp_ipv6_mapped; sk->sk_backlog_rcv = dccp_v4_do_rcv; err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { icsk->icsk_ext_hdr_len = exthdrlen; icsk->icsk_af_ops = &dccp_ipv6_af_ops; sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; } np->saddr = sk->sk_v6_rcv_saddr; return err; } if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_DCCP; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } if (saddr == NULL) { saddr = &fl6.saddr; sk->sk_v6_rcv_saddr = *saddr; } /* set the source address */ np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (opt) icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; inet->inet_dport = usin->sin6_port; dccp_set_state(sk, DCCP_REQUESTING); err = inet6_hash_connect(&dccp_death_row, sk); if (err) goto late_failure; dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); err = dccp_connect(sk); if (err) goto late_failure; return 0; late_failure: dccp_set_state(sk, DCCP_CLOSED); __sk_dst_reset(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; return err; }
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info) { struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; int err; struct tcp_sock *tp; __u32 seq; sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == TCP_TIME_WAIT) { inet_twsk_put((struct inet_timewait_sock *)sk); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; tp = tcp_sk(sk); seq = ntohl(th->seq); if (sk->sk_state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) goto out; /* icmp should have updated the destination cache entry */ dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct flowi fl; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; if ((err = ip6_dst_lookup(sk, &dst, &fl))) { sk->sk_err_soft = -err; goto out; } if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; goto out; } } else dst_hold(dst); if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ dst_release(dst); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (!req) goto out; /* ICMPs are not backlogged, hence we cannot get * an established socket here. */ BUG_TRAP(req->sk == NULL); if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: case TCP_SYN_RECV: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; __u64 seq; struct net *net = dev_net(skb->dev); if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } sk = inet6_lookup(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; /* icmp should have updated the destination cache entry */ dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct flowi fl; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) { sk->sk_err_soft = -err; goto out; } err = xfrm_lookup(net, &dst, &fl, sk, 0); if (err < 0) { sk->sk_err_soft = -err; goto out; } } else dst_hold(dst); if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { dccp_sync_mss(sk, dst_mtu(dst)); } /* else let the usual retransmit timer handle it */ dst_release(dst); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; case DCCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet6_csk_search_req(sk, &prev, dh->dccph_dport, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (req == NULL) goto out; /* * ICMPs are not backlogged, hence we cannot get an established * socket here. */ WARN_ON(req->sk != NULL); if (seq != dccp_rsk(req)->dreq_iss) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case DCCP_REQUESTING: case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error * (see connect in sock.c) */ sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else sk->sk_err_soft = err; out: bh_unlock_sock(sk); sock_put(sk); }
/* * All SKB's seen here are completely headerless. It is our * job to build the DCCP header, and pass the packet down to * IP so it can do the same plus pass the packet off to the * device. */ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (likely(skb != NULL)) { struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); struct dccp_hdr *dh; /* XXX For now we're using only 48 bits sequence numbers */ const u32 dccp_header_size = sizeof(*dh) + sizeof(struct dccp_hdr_ext) + dccp_packet_hdr_len(dcb->dccpd_type); int err, set_ack = 1; u64 ackno = dp->dccps_gsr; /* * Increment GSS here already in case the option code needs it. * Update GSS for real only if option processing below succeeds. */ dcb->dccpd_seq = ADD48(dp->dccps_gss, 1); switch (dcb->dccpd_type) { case DCCP_PKT_DATA: set_ack = 0; /* fall through */ case DCCP_PKT_DATAACK: case DCCP_PKT_RESET: break; case DCCP_PKT_REQUEST: set_ack = 0; /* Use ISS on the first (non-retransmitted) Request. */ if (icsk->icsk_retransmits == 0) dcb->dccpd_seq = dp->dccps_iss; /* fall through */ case DCCP_PKT_SYNC: case DCCP_PKT_SYNCACK: ackno = dcb->dccpd_ack_seq; /* fall through */ default: /* * Set owner/destructor: some skbs are allocated via * alloc_skb (e.g. when retransmission may happen). * Only Data, DataAck, and Reset packets should come * through here with skb->sk set. */ WARN_ON(skb->sk); skb_set_owner_w(skb, sk); break; } if (dccp_insert_options(sk, skb)) { kfree_skb(skb); return -EPROTO; } /* Build DCCP header and checksum it. */ dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_type = dcb->dccpd_type; dh->dccph_sport = inet->inet_sport; dh->dccph_dport = inet->inet_dport; dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; dh->dccph_ccval = dcb->dccpd_ccval; dh->dccph_cscov = dp->dccps_pcslen; /* XXX For now we're using only 48 bits sequence numbers */ dh->dccph_x = 1; dccp_update_gss(sk, dcb->dccpd_seq); dccp_hdr_set_seq(dh, dp->dccps_gss); if (set_ack) dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); switch (dcb->dccpd_type) { case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = dp->dccps_service; /* * Limit Ack window to ISS <= P.ackno <= GSS, so that * only Responses to Requests we sent are considered. */ dp->dccps_awl = dp->dccps_iss; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; break; } icsk->icsk_af_ops->send_check(sk, skb); if (set_ack) dccp_event_ack_sent(sk); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); return net_xmit_eval(err); } return -ENOBUFS; }
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_minisock *dmsk = dccp_msk(sk); struct inet_connection_sock *icsk = inet_csk(sk); dccp_minisock_init(&dp->dccps_minisock); do_gettimeofday(&dp->dccps_epoch); /* * FIXME: We're hardcoding the CCID, and doing this at this point makes * the listening (master) sock get CCID control blocks, which is not * necessary, but for now, to not mess with the test userspace apps, * lets leave it here, later the real solution is to do this in a * setsockopt(CCIDs-I-want/accept). -acme */ if (likely(ctl_sock_initialized)) { int rc = dccp_feat_init(dmsk); if (rc) return rc; if (dmsk->dccpms_send_ack_vector) { dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL); if (dp->dccps_hc_rx_ackvec == NULL) return -ENOMEM; } dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid, sk, GFP_KERNEL); dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid, sk, GFP_KERNEL); if (unlikely(dp->dccps_hc_rx_ccid == NULL || dp->dccps_hc_tx_ccid == NULL)) { ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); if (dmsk->dccpms_send_ack_vector) { dccp_ackvec_free(dp->dccps_hc_rx_ackvec); dp->dccps_hc_rx_ackvec = NULL; } dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; return -ENOMEM; } } else { /* control socket doesn't need feat nego */ INIT_LIST_HEAD(&dmsk->dccpms_pending); INIT_LIST_HEAD(&dmsk->dccpms_conf); } dccp_init_xmit_timers(sk); icsk->icsk_rto = DCCP_TIMEOUT_INIT; icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; return 0; }
/* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; u32 daddr, nexthop; int tmp; int err; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; if (inet->opt && inet->opt->srr) { if (!daddr) return -EINVAL; nexthop = inet->opt->faddr; } tmp = ip_route_connect(&rt, nexthop, inet->saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, inet->sport, usin->sin_port, sk); if (tmp < 0) return tmp; if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } if (!inet->opt || !inet->opt->srr) daddr = rt->rt_dst; if (!inet->saddr) inet->saddr = rt->rt_src; inet->rcv_saddr = inet->saddr; if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { /* Reset inherited state */ tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; tp->write_seq = 0; } if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { struct inet_peer *peer = rt_get_peer(rt); /* VJ's idea. We save last timestamp seen from * the destination in peer table, when entering state TIME-WAIT * and initialize rx_opt.ts_recent from it, when trying new connection. */ if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) { tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; tp->rx_opt.ts_recent = peer->tcp_ts; } } inet->dport = usin->sin_port; inet->daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt) inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; tp->rx_opt.mss_clamp = 536; /* Socket identity is still unknown (sport may be zero). * However we set state to SYN-SENT and not releasing socket * lock select source port, enter ourselves into the hash tables and * complete initialization after this. */ tcp_set_state(sk, TCP_SYN_SENT); err = inet_hash_connect(&tcp_death_row, sk); if (err) goto failure; err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk); if (err) goto failure; /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->u.dst); if (!tp->write_seq) tp->write_seq = secure_tcp_sequence_number(inet->saddr, inet->daddr, inet->sport, usin->sin_port); inet->id = tp->write_seq ^ jiffies; err = tcp_connect(sk); rt = NULL; if (err) goto failure; return 0; failure: /* This unhashes the socket and releases the local port, if necessary. */ tcp_set_state(sk, TCP_CLOSE); ip_rt_put(rt); sk->sk_route_caps = 0; inet->dport = 0; return err; }
static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); int val, valbool; int retv = -ENOPROTOOPT; if (optval == NULL) val=0; else { if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; } else val = 0; } valbool = (val!=0); if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); lock_sock(sk); switch (optname) { case IPV6_ADDRFORM: if (optlen < sizeof(int)) goto e_inval; if (val == PF_INET) { struct ipv6_txoptions *opt; struct sk_buff *pktopt; if (sk->sk_type == SOCK_RAW) break; if (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_UDPLITE) { struct udp_sock *up = udp_sk(sk); if (up->pending == AF_INET6) { retv = -EBUSY; break; } } else if (sk->sk_protocol != IPPROTO_TCP) break; if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; } if (ipv6_only_sock(sk) || !ipv6_addr_v4mapped(&np->daddr)) { retv = -EADDRNOTAVAIL; break; } fl6_free_socklist(sk); ipv6_sock_mc_close(sk); /* * Sock is moving from IPv6 to IPv4 (sk_prot), so * remove it from the refcnt debug socks count in the * original family... */ sk_refcnt_debug_dec(sk); if (sk->sk_protocol == IPPROTO_TCP) { struct inet_connection_sock *icsk = inet_csk(sk); local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); local_bh_enable(); sk->sk_prot = &tcp_prot; icsk->icsk_af_ops = &ipv4_specific; sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct proto *prot = &udp_prot; if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); local_bh_enable(); sk->sk_prot = prot; sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } opt = xchg(&np->opt, NULL); if (opt) sock_kfree_s(sk, opt, opt->tot_len); pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); sk->sk_destruct = inet_sock_destruct; /* * ... and add it to the refcnt debug socks count * in the new family. -acme */ sk_refcnt_debug_inc(sk); module_put(THIS_MODULE); retv = 0; break; } goto e_inval; case IPV6_V6ONLY: if (optlen < sizeof(int) || inet_sk(sk)->inet_num) goto e_inval; np->ipv6only = valbool; retv = 0; break; case IPV6_RECVPKTINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxinfo = valbool; retv = 0; break; case IPV6_2292PKTINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxoinfo = valbool; retv = 0; break; case IPV6_RECVHOPLIMIT: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxhlim = valbool; retv = 0; break; case IPV6_2292HOPLIMIT: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxohlim = valbool; retv = 0; break; case IPV6_RECVRTHDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.srcrt = valbool; retv = 0; break; case IPV6_2292RTHDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.osrcrt = valbool; retv = 0; break; case IPV6_RECVHOPOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.hopopts = valbool; retv = 0; break; case IPV6_2292HOPOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.ohopopts = valbool; retv = 0; break; case IPV6_RECVDSTOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.dstopts = valbool; retv = 0; break; case IPV6_2292DSTOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.odstopts = valbool; retv = 0; break; case IPV6_TCLASS: if (optlen < sizeof(int)) goto e_inval; if (val < -1 || val > 0xff) goto e_inval; /* RFC 3542, 6.5: default traffic class of 0x0 */ if (val == -1) val = 0; np->tclass = val; retv = 0; break; case IPV6_RECVTCLASS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxtclass = valbool; retv = 0; break; case IPV6_FLOWINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxflow = valbool; retv = 0; break; case IPV6_RECVPATHMTU: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxpmtu = valbool; retv = 0; break; case IPV6_TRANSPARENT: if (valbool && !capable(CAP_NET_ADMIN) && !capable(CAP_NET_RAW)) { retv = -EPERM; break; } if (optlen < sizeof(int)) goto e_inval; /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */ inet_sk(sk)->transparent = valbool; retv = 0; break; case IPV6_RECVORIGDSTADDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxorigdstaddr = valbool; retv = 0; break; case IPV6_HOPOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; /* remove any sticky options header with a zero option * length, per RFC3542. */ if (optlen == 0) optval = NULL; else if (optval == NULL) goto e_inval; else if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) goto e_inval; /* hop-by-hop / destination options are privileged option */ retv = -EPERM; if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) break; opt = ipv6_renew_options(sk, np->opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); if (IS_ERR(opt)) { retv = PTR_ERR(opt); break; } /* routing header option needs extra check */ retv = -EINVAL; if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) goto sticky_done; break; #endif default: goto sticky_done; } } retv = 0; opt = ipv6_update_options(sk, opt); sticky_done: if (opt) sock_kfree_s(sk, opt, opt->tot_len); break; } case IPV6_PKTINFO: { struct in6_pktinfo pkt; if (optlen == 0) goto e_inval; else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL) goto e_inval; if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) { retv = -EFAULT; break; } if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if) goto e_inval; np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex; np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr; retv = 0; break; } case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; struct flowi6 fl6; int junk; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; if (optlen == 0) goto update; /* 1K is probably excessive * 1K is surely not enough, 2K per standard header is 16K. */ retv = -EINVAL; if (optlen > 64*1024) break; opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL); retv = -ENOBUFS; if (opt == NULL) break; memset(opt, 0, sizeof(*opt)); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_user(opt+1, optval, optlen)) goto done; msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk, &junk); if (retv) goto done; update: retv = 0; opt = ipv6_update_options(sk, opt); done: if (opt) sock_kfree_s(sk, opt, opt->tot_len); break; } case IPV6_UNICAST_HOPS: if (optlen < sizeof(int)) goto e_inval; if (val > 255 || val < -1) goto e_inval; np->hop_limit = val; retv = 0; break; case IPV6_MULTICAST_HOPS: if (sk->sk_type == SOCK_STREAM) break; if (optlen < sizeof(int)) goto e_inval; if (val > 255 || val < -1) goto e_inval; np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); retv = 0; break; case IPV6_MULTICAST_LOOP: if (optlen < sizeof(int)) goto e_inval; if (val != valbool) goto e_inval; np->mc_loop = valbool; retv = 0; break; case IPV6_UNICAST_IF: { struct net_device *dev = NULL; int ifindex; if (optlen != sizeof(int)) goto e_inval; ifindex = (__force int)ntohl((__force __be32)val); if (ifindex == 0) { np->ucast_oif = 0; retv = 0; break; } dev = dev_get_by_index(net, ifindex); retv = -EADDRNOTAVAIL; if (!dev) break; dev_put(dev); retv = -EINVAL; if (sk->sk_bound_dev_if) break; np->ucast_oif = ifindex; retv = 0; break; } case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) break; if (optlen < sizeof(int)) goto e_inval; if (val) { struct net_device *dev; if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) goto e_inval; dev = dev_get_by_index(net, val); if (!dev) { retv = -ENODEV; break; } dev_put(dev); } np->mcast_oif = val; retv = 0; break; case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: { struct ipv6_mreq mreq; if (optlen < sizeof(struct ipv6_mreq)) goto e_inval; retv = -EPROTO; if (inet_sk(sk)->is_icsk) break; retv = -EFAULT; if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq))) break; if (optname == IPV6_ADD_MEMBERSHIP) retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); else retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); break; } case IPV6_JOIN_ANYCAST: case IPV6_LEAVE_ANYCAST: { struct ipv6_mreq mreq; if (optlen < sizeof(struct ipv6_mreq)) goto e_inval; retv = -EFAULT; if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq))) break; if (optname == IPV6_JOIN_ANYCAST) retv = ipv6_sock_ac_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); else retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); break; } case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: { struct group_req greq; struct sockaddr_in6 *psin6; if (optlen < sizeof(struct group_req)) goto e_inval; retv = -EFAULT; if (copy_from_user(&greq, optval, sizeof(struct group_req))) break; if (greq.gr_group.ss_family != AF_INET6) { retv = -EADDRNOTAVAIL; break; } psin6 = (struct sockaddr_in6 *)&greq.gr_group; if (optname == MCAST_JOIN_GROUP) retv = ipv6_sock_mc_join(sk, greq.gr_interface, &psin6->sin6_addr); else retv = ipv6_sock_mc_drop(sk, greq.gr_interface, &psin6->sin6_addr); break; } case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: { struct group_source_req greqs; int omode, add; if (optlen < sizeof(struct group_source_req)) goto e_inval; if (copy_from_user(&greqs, optval, sizeof(greqs))) { retv = -EFAULT; break; } if (greqs.gsr_group.ss_family != AF_INET6 || greqs.gsr_source.ss_family != AF_INET6) { retv = -EADDRNOTAVAIL; break; } if (optname == MCAST_BLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 1; } else if (optname == MCAST_UNBLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 0; } else if (optname == MCAST_JOIN_SOURCE_GROUP) { struct sockaddr_in6 *psin6; psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; retv = ipv6_sock_mc_join(sk, greqs.gsr_interface, &psin6->sin6_addr); /* prior join w/ different source is ok */ if (retv && retv != -EADDRINUSE) break; omode = MCAST_INCLUDE; add = 1; } else /* MCAST_LEAVE_SOURCE_GROUP */ { omode = MCAST_INCLUDE; add = 0; } retv = ip6_mc_source(add, omode, sk, &greqs); break; } case MCAST_MSFILTER: { extern int sysctl_mld_max_msf; struct group_filter *gsf; if (optlen < GROUP_FILTER_SIZE(0)) goto e_inval; if (optlen > sysctl_optmem_max) { retv = -ENOBUFS; break; } gsf = kmalloc(optlen,GFP_KERNEL); if (!gsf) { retv = -ENOBUFS; break; } retv = -EFAULT; if (copy_from_user(gsf, optval, optlen)) { kfree(gsf); break; } /* numsrc >= (4G-140)/128 overflow in 32 bits */ if (gsf->gf_numsrc >= 0x1ffffffU || gsf->gf_numsrc > sysctl_mld_max_msf) { kfree(gsf); retv = -ENOBUFS; break; } if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { kfree(gsf); retv = -EINVAL; break; } retv = ip6_mc_msfilter(sk, gsf); kfree(gsf); break; } case IPV6_ROUTER_ALERT: if (optlen < sizeof(int)) goto e_inval; retv = ip6_ra_control(sk, val); break; case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) goto e_inval; np->pmtudisc = val; retv = 0; break; case IPV6_MTU: if (optlen < sizeof(int)) goto e_inval; if (val && val < IPV6_MIN_MTU) goto e_inval; np->frag_size = val; retv = 0; break; case IPV6_RECVERR: if (optlen < sizeof(int)) goto e_inval; np->recverr = valbool; if (!val) skb_queue_purge(&sk->sk_error_queue); retv = 0; break; case IPV6_FLOWINFO_SEND: if (optlen < sizeof(int)) goto e_inval; np->sndflow = valbool; retv = 0; break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); break; case IPV6_IPSEC_POLICY: case IPV6_XFRM_POLICY: retv = -EPERM; if (!capable(CAP_NET_ADMIN)) break; retv = xfrm_user_policy(sk, optname, optval, optlen); break; case IPV6_ADDR_PREFERENCES: { unsigned int pref = 0; unsigned int prefmask = ~0; if (optlen < sizeof(int)) goto e_inval; retv = -EINVAL; /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ switch (val & (IPV6_PREFER_SRC_PUBLIC| IPV6_PREFER_SRC_TMP| IPV6_PREFER_SRC_PUBTMP_DEFAULT)) { case IPV6_PREFER_SRC_PUBLIC: pref |= IPV6_PREFER_SRC_PUBLIC; break; case IPV6_PREFER_SRC_TMP: pref |= IPV6_PREFER_SRC_TMP; break; case IPV6_PREFER_SRC_PUBTMP_DEFAULT: break; case 0: goto pref_skip_pubtmp; default: goto e_inval; } prefmask &= ~(IPV6_PREFER_SRC_PUBLIC| IPV6_PREFER_SRC_TMP); pref_skip_pubtmp: /* check HOME/COA conflicts */ switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) { case IPV6_PREFER_SRC_HOME: break; case IPV6_PREFER_SRC_COA: pref |= IPV6_PREFER_SRC_COA; case 0: goto pref_skip_coa; default: goto e_inval; } prefmask &= ~IPV6_PREFER_SRC_COA; pref_skip_coa: /* check CGA/NONCGA conflicts */ switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) { case IPV6_PREFER_SRC_CGA: case IPV6_PREFER_SRC_NONCGA: case 0: break; default: goto e_inval; } np->srcprefs = (np->srcprefs & prefmask) | pref; retv = 0; break; } case IPV6_MINHOPCOUNT: if (optlen < sizeof(int)) goto e_inval; if (val < 0 || val > 255) goto e_inval; np->min_hopcount = val; retv = 0; break; case IPV6_DONTFRAG: np->dontfrag = valbool; retv = 0; break; } release_sock(sk); return retv; e_inval: release_sock(sk); return -EINVAL; }
static struct sock *tcp_fastopen_create_child(struct sock *sk, struct sk_buff *skb, struct dst_entry *dst, struct request_sock *req) { struct tcp_sock *tp; struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; struct sock *child; bool own_req; req->num_retrans = 0; req->num_timeout = 0; req->sk = NULL; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, NULL, &own_req); if (!child) return NULL; spin_lock(&queue->fastopenq.lock); queue->fastopenq.qlen++; spin_unlock(&queue->fastopenq.lock); /* Initialize the child socket. Have to fix some values to take * into account the child is a Fast Open socket and is created * only out of the bits carried in the SYN packet. */ tp = tcp_sk(child); tp->fastopen_rsk = req; tcp_rsk(req)->tfo_listener = true; /* RFC1323: The window in SYN & SYN/ACK segments is never * scaled. So correct it appropriately. */ tp->snd_wnd = ntohs(tcp_hdr(skb)->window); /* Activate the retrans timer so that SYNACK can be retransmitted. * The request socket is not added to the ehash * because it's been added to the accept queue directly. */ inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT, TCP_RTO_MAX); atomic_set(&req->rsk_refcnt, 2); /* Now finish processing the fastopen child socket. */ inet_csk(child)->icsk_af_ops->rebuild_header(child); tcp_init_congestion_control(child); tcp_mtup_init(child); tcp_init_metrics(child); tcp_init_buffer_space(child); tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; tcp_fastopen_add_skb(child, skb); tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; tp->rcv_wup = tp->rcv_nxt; /* tcp_conn_request() is sending the SYNACK, * and queues the child into listener accept queue. */ return child; }