static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; const struct iphdr *iph = ip_hdr(skb); struct flowi4 fl4 = { .flowi4_oif = inet_iif(skb), .daddr = iph->saddr, .saddr = iph->daddr, .flowi4_tos = RT_CONN_FLAGS(sk), .flowi4_proto = sk->sk_protocol, .fl4_sport = dccp_hdr(skb)->dccph_dport, .fl4_dport = dccp_hdr(skb)->dccph_sport, }; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } return &rt->dst; } static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; struct dst_entry *dst; struct flowi4 fl4; dst = inet_csk_route_req(sk, &fl4, req); if (dst == NULL) goto out; skb = dccp_make_response(sk, dst, req); if (skb != NULL) { const struct inet_request_sock *ireq = inet_rsk(req); struct dccp_hdr *dh = dccp_hdr(skb); dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, rcu_dereference(ireq->ireq_opt)); rcu_read_unlock(); err = net_xmit_eval(err); } out: dst_release(dst); return err; }
static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4, struct net_device *vrf_dev) { struct rtable *rt; int err = 1; rt = ip_route_output_flow(dev_net(vrf_dev), fl4, NULL); if (IS_ERR(rt)) goto out; /* TO-DO: what about broadcast ? */ if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { ip_rt_put(rt); goto out; } skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); err = 0; out: return err; }
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = NULL; struct flowi4 fl4; int free = 0; __be32 daddr; __be32 saddr; u8 tos; int err; struct ip_options_data opt_copy; err = -EMSGSIZE; if (len > 0xFFFF) goto out; /* * Check the flags. */ err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */ goto out; /* compatibility */ /* * Get and verify the address. */ if (msg->msg_namelen) { struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; if (usin->sin_family != AF_INET) { pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n", __func__, current->comm); err = -EAFNOSUPPORT; if (usin->sin_family) goto out; } daddr = usin->sin_addr.s_addr; /* ANK: I did not forget to get protocol from port field. * I just do not know, who uses this weirdness. * IP_HDRINCL is much more convenient. */ } else { err = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; daddr = inet->inet_daddr; } ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) goto out; if (ipc.opt) free = 1; } saddr = ipc.addr; ipc.addr = daddr; if (!ipc.opt) { struct ip_options_rcu *inet_opt; rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt) { memcpy(&opt_copy, inet_opt, sizeof(*inet_opt) + inet_opt->opt.optlen); ipc.opt = &opt_copy.opt; } rcu_read_unlock(); } if (ipc.opt) { err = -EINVAL; /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ if (inet->hdrincl) goto done; if (ipc.opt->opt.srr) { if (!daddr) goto done; daddr = ipc.opt->opt.faddr; } } tos = RT_CONN_FLAGS(sk); if (msg->msg_flags & MSG_DONTROUTE) tos |= RTO_ONLINK; if (ipv4_is_multicast(daddr)) { if (!ipc.oif) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; } else if (!ipc.oif) ipc.oif = inet->uc_index; flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sock_i_uid(sk)); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); if (err) goto done; } security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto done; } err = -EACCES; if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) goto done; if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len, &rt, msg->msg_flags); else { if (!ipc.addr) ipc.addr = fl4.daddr; lock_sock(sk); err = ip_append_data(sk, &fl4, ip_generic_getfrag, msg->msg_iov, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) { err = ip_push_pending_frames(sk, &fl4); if (err == -ENOBUFS && !inet->recverr) err = 0; } release_sock(sk); } done: if (free) kfree(ipc.opt); ip_rt_put(rt); out: if (err < 0) return err; return len; do_confirm: dst_confirm(&rt->dst); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto done; }
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = NULL; int free = 0; __be32 daddr; __be32 saddr; u8 tos; int err; err = -EMSGSIZE; if (len > 0xFFFF) goto out; /* * Check the flags. */ err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */ goto out; /* compatibility */ /* * Get and verify the address. */ if (msg->msg_namelen) { struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; if (usin->sin_family != AF_INET) { static int complained; if (!complained++) printk(KERN_INFO "%s forgot to set AF_INET in " "raw sendmsg. Fix it!\n", current->comm); err = -EAFNOSUPPORT; if (usin->sin_family) goto out; } daddr = usin->sin_addr.s_addr; /* ANK: I did not forget to get protocol from port field. * I just do not know, who uses this weirdness. * IP_HDRINCL is much more convenient. */ } else { err = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; daddr = inet->inet_daddr; } ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) goto out; if (ipc.opt) free = 1; } saddr = ipc.addr; ipc.addr = daddr; if (!ipc.opt) ipc.opt = inet->opt; if (ipc.opt) { err = -EINVAL; /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ if (inet->hdrincl) goto done; if (ipc.opt->srr) { if (!daddr) goto done; daddr = ipc.opt->faddr; } } tos = RT_CONN_FLAGS(sk); if (msg->msg_flags & MSG_DONTROUTE) tos |= RTO_ONLINK; if (ipv4_is_multicast(daddr)) { if (!ipc.oif) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; } { struct flowi4 fl4 = { .flowi4_oif = ipc.oif, .flowi4_mark = sk->sk_mark, .daddr = daddr, .saddr = saddr, .flowi4_tos = tos, .flowi4_proto = (inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol), .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, }; if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); if (err) goto done; } security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto done; } } err = -EACCES; if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) goto done; if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, msg->msg_iov, len, &rt, msg->msg_flags); else { if (!ipc.addr) ipc.addr = rt->rt_dst; lock_sock(sk); err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) { err = ip_push_pending_frames(sk); if (err == -ENOBUFS && !inet->recverr) err = 0; } release_sock(sk); } done: if (free) kfree(ipc.opt); ip_rt_put(rt); out: if (err < 0) return err; return len; do_confirm: dst_confirm(&rt->dst); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto done; } static void raw_close(struct sock *sk, long timeout) { /* * Raw sockets may have direct kernel refereneces. Kill them. */ ip_ra_control(sk, 0, NULL); sk_common_release(sk); }
int xtnu_ip_route_output_key(void *net, struct rtable **rp, struct flowi *flp) { return ip_route_output_flow(rp, flp, NULL, 0); }
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = NULL; int free = 0; u32 daddr; u32 saddr; u8 tos; int err; err = -EMSGSIZE; if (len < 0 || len > 0xFFFF) goto out; /* * Check the flags. */ err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */ goto out; /* compatibility */ /* * Get and verify the address. */ if (msg->msg_namelen) { struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name; err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; if (usin->sin_family != AF_INET) { static int complained; if (!complained++) printk(KERN_INFO "%s forgot to set AF_INET in " "raw sendmsg. Fix it!\n", current->comm); err = -EAFNOSUPPORT; if (usin->sin_family) goto out; } daddr = usin->sin_addr.s_addr; /* ANK: I did not forget to get protocol from port field. * I just do not know, who uses this weirdness. * IP_HDRINCL is much more convenient. */ } else { err = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; daddr = inet->daddr; } ipc.addr = inet->saddr; ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { err = ip_cmsg_send(msg, &ipc); if (err) goto out; if (ipc.opt) free = 1; } saddr = ipc.addr; ipc.addr = daddr; if (!ipc.opt) ipc.opt = inet->opt; if (ipc.opt) { err = -EINVAL; /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ if (inet->hdrincl) goto done; if (ipc.opt->srr) { if (!daddr) goto done; daddr = ipc.opt->faddr; } } tos = RT_CONN_FLAGS(sk); if (msg->msg_flags & MSG_DONTROUTE) tos |= RTO_ONLINK; if (MULTICAST(daddr)) { if (!ipc.oif) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; } { struct flowi fl = { .oif = ipc.oif, .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, .tos = tos } }, .proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, }; if (!inet->hdrincl) raw_probe_proto_opt(&fl, msg); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) goto done; err = -EACCES; if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) goto done; if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, msg->msg_iov, len, rt, msg->msg_flags); else { if (!ipc.addr) ipc.addr = rt->rt_dst; lock_sock(sk); err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, &ipc, rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) err = ip_push_pending_frames(sk); release_sock(sk); } done: if (free) kfree(ipc.opt); ip_rt_put(rt); out: return err < 0 ? err : len; do_confirm: dst_confirm(&rt->u.dst); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto done; }
static int mptp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { int err; uint16_t dport; __be32 daddr; __be32 saddr; uint16_t sport; struct sk_buff *skb; struct sock *sk; struct inet_sock *isk; struct mptp_sock *ssk; struct mptphdr *shdr; int connected = 0; int totlen; struct rtable *rt = NULL; int dests = 0; int i; struct sockaddr_mptp *mptp_addr = NULL; int ret = 0; if (unlikely(sock == NULL)) { log_error("Sock is NULL\n"); err = -EINVAL; goto out; } sk = sock->sk; if (unlikely(sk == NULL)) { log_error("Sock->sk is NULL\n"); err = -EINVAL; goto out; } isk = inet_sk(sk); ssk = mptp_sk(sk); sport = ssk->src; saddr = isk->inet_saddr; if (sport == 0) { sport = get_next_free_port(); if (unlikely(sport == 0)) { log_error("No free ports\n"); err = -ENOMEM; goto out; } } if (msg->msg_name) { mptp_addr = (struct sockaddr_mptp *)msg->msg_name; if (unlikely (msg->msg_namelen < sizeof(*mptp_addr) + mptp_addr->count * sizeof(struct mptp_dest) || mptp_addr->count <= 0)) { log_error ("Invalid size for msg_name (size=%u, addr_count=%u)\n", msg->msg_namelen, mptp_addr->count); err = -EINVAL; goto out; } dests = mptp_addr->count; } else { BUG(); if (unlikely(!ssk->dst || !isk->inet_daddr)) { log_error("No destination port/address\n"); err = -EDESTADDRREQ; goto out; } dport = ssk->dst; daddr = isk->inet_daddr; log_debug ("Got from socket destination port=%u and address=%u\n", dport, daddr); connected = 1; } if (msg->msg_iovlen < dests) dests = msg->msg_iovlen; for (i = 0; i < dests; i++) { struct mptp_dest *dest = &mptp_addr->dests[i]; struct iovec *iov = &msg->msg_iov[i]; char *payload; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39) struct flowi fl = {}; #endif dport = ntohs(dest->port); if (unlikely(dport == 0 || dport >= MAX_MPTP_PORT)) { log_error("Invalid value for destination port(%u)\n", dport); err = -EINVAL; goto out; } daddr = dest->addr; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39) fl.u.ip4.saddr = saddr; fl.u.ip4.daddr = daddr; fl.flowi_proto = sk->sk_protocol; fl.flowi_flags = inet_sk_flowi_flags(sk); #endif log_debug ("Received from user space destination port=%u and address=%u\n", dport, daddr); len = iov->iov_len; totlen = len + sizeof(struct mptphdr) + sizeof(struct iphdr); skb = sock_alloc_send_skb(sk, totlen, msg->msg_flags & MSG_DONTWAIT, &err); if (unlikely(!skb)) { log_error("sock_alloc_send_skb failed\n"); goto out; } log_debug("Allocated %u bytes for skb (payload size=%u)\n", totlen, len); skb_reset_network_header(skb); skb_reserve(skb, sizeof(struct iphdr)); log_debug("Reseted network header\n"); skb_reset_transport_header(skb); skb_put(skb, sizeof(struct mptphdr)); log_debug("Reseted transport header\n"); shdr = (struct mptphdr *)skb_transport_header(skb); shdr->dst = htons(dport); shdr->src = htons(sport); shdr->len = htons(len + sizeof(struct mptphdr)); payload = skb_put(skb, len); log_debug("payload=%p\n", payload); err = skb_copy_datagram_from_iovec(skb, sizeof(struct mptphdr), iov, 0, len); if (unlikely(err)) { log_error("skb_copy_datagram_from_iovec failed\n"); goto out_free; } log_debug("Copied %u bytes into the skb\n", len); if (connected) rt = (struct rtable *)__sk_dst_check(sk, 0); if (rt == NULL) { log_debug("rt == NULL\n"); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) struct flowi fl = {.fl4_dst = daddr, .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), }; err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); if (unlikely(err)) { log_error("Route lookup failed\n"); goto out_free; } #else rt = ip_route_output_flow(sock_net(sk), &fl.u.ip4, sk); log_debug("rt = %p\n", rt); if (IS_ERR(rt)) { log_error("Route lookup failed\n"); goto out_free; } #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) sk_dst_set(sk, dst_clone(&rt->u.dst)); #else sk_dst_set(sk, dst_clone(&rt->dst)); #endif } log_debug("rt != NULL\n"); skb->local_df = 1; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) err = ip_queue_xmit(skb); #else err = ip_queue_xmit(skb, &fl); #endif if (likely(!err)) { log_debug("Sent %u bytes on wire\n", len); ret += len; dest->bytes = len; } else { log_error("ip_queue_xmit failed\n"); dest->bytes = -1; } } return ret; out_free: kfree(skb); out: return err; }
static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { struct iphdr *ip4h = ip_hdr(skb); int ret = NET_XMIT_DROP; struct flowi4 fl4 = { /* needed to match OIF rule */ .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, .flowi4_proto = ip4h->protocol, .daddr = ip4h->daddr, .saddr = ip4h->saddr, }; struct net *net = dev_net(vrf_dev); struct rtable *rt; rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; skb_dst_drop(skb); /* if dst.dev is loopback or the VRF device again this is locally * originated traffic destined to a local address. Short circuit * to Rx path using our local dst */ if (rt->dst.dev == net->loopback_dev || rt->dst.dev == vrf_dev) { struct net_vrf *vrf = netdev_priv(vrf_dev); struct rtable *rth_local; struct dst_entry *dst = NULL; ip_rt_put(rt); rcu_read_lock(); rth_local = rcu_dereference(vrf->rth_local); if (likely(rth_local)) { dst = &rth_local->dst; dst_hold(dst); } rcu_read_unlock(); if (unlikely(!dst)) goto err; return vrf_local_xmit(skb, vrf_dev, dst); } skb_dst_set(skb, &rt->dst); /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); if (!ip4h->saddr) { ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0, RT_SCOPE_LINK); } ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else ret = NET_XMIT_SUCCESS; out: return ret; err: vrf_tx_error(vrf_dev, skb); goto out; } static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) { switch (skb->protocol) { case htons(ETH_P_IP): return vrf_process_v4_outbound(skb, dev); case htons(ETH_P_IPV6): return vrf_process_v6_outbound(skb, dev); default: vrf_tx_error(dev, skb); return NET_XMIT_DROP; } } static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { netdev_tx_t ret = is_ip_tx_frame(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); dstats->tx_pkts++; dstats->tx_bytes += skb->len; u64_stats_update_end(&dstats->syncp); } else { this_cpu_inc(dev->dstats->tx_drps); } return ret; }